00001 <?php
00002
00003
00004
00005
00006 $xmlReaderConstants = array(
00007 "NONE",
00008 "ELEMENT",
00009 "ATTRIBUTE",
00010 "TEXT",
00011 "CDATA",
00012 "ENTITY_REF",
00013 "ENTITY",
00014 "PI",
00015 "COMMENT",
00016 "DOC",
00017 "DOC_TYPE",
00018 "DOC_FRAGMENT",
00019 "NOTATION",
00020 "WHITESPACE",
00021 "SIGNIFICANT_WHITESPACE",
00022 "END_ELEMENT",
00023 "END_ENTITY",
00024 "XML_DECLARATION",
00025 "LOADDTD",
00026 "DEFAULTATTRS",
00027 "VALIDATE",
00028 "SUBST_ENTITIES" );
00029 foreach( $xmlReaderConstants as $name ) {
00030 $fullName = "XMLREADER_$name";
00031 $newName = "XMLReader::$name";
00032 if( !defined( $fullName ) ) {
00033 if( defined( $newName ) ) {
00034 define( $fullName, constant( $newName ) );
00035 } else {
00036
00037 }
00038 }
00039 }
00040
00057 class BaseDump {
00058 var $reader = null;
00059 var $atEnd = false;
00060 var $atPageEnd = false;
00061 var $lastPage = 0;
00062 var $lastRev = 0;
00063
00064 function BaseDump( $infile ) {
00065 $this->reader = new XMLReader();
00066 $this->reader->open( $infile );
00067 }
00068
00078 function prefetch( $page, $rev ) {
00079 $page = intval( $page );
00080 $rev = intval( $rev );
00081 while( $this->lastPage < $page && !$this->atEnd ) {
00082 $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
00083 $this->nextPage();
00084 }
00085 if( $this->lastPage > $page || $this->atEnd ) {
00086 $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev [$this->lastPage, $this->lastRev]" );
00087 return null;
00088 }
00089 while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
00090 $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
00091 $this->nextRev();
00092 }
00093 if( $this->lastRev == $rev && !$this->atEnd ) {
00094 $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
00095 return $this->nextText();
00096 } else {
00097 $this->debug( "BaseDump::prefetch already past rev $rev on page $page [$this->lastPage, $this->lastRev]" );
00098 return null;
00099 }
00100 }
00101
00102 function debug( $str ) {
00103 wfDebug( $str . "\n" );
00104
00105
00106 }
00107
00111 function nextPage() {
00112 if( $this->skipTo( 'page', 'mediawiki' ) ) {
00113 if( $this->skipTo( 'id' ) ) {
00114 $this->lastPage = intval( $this->nodeContents() );
00115 $this->lastRev = 0;
00116 $this->atPageEnd = false;
00117 }
00118 } else {
00119 $this->atEnd = true;
00120 }
00121 }
00122
00126 function nextRev() {
00127 if( $this->skipTo( 'revision' ) ) {
00128 if( $this->skipTo( 'id' ) ) {
00129 $this->lastRev = intval( $this->nodeContents() );
00130 }
00131 } else {
00132 $this->atPageEnd = true;
00133 }
00134 }
00135
00139 function nextText() {
00140 $this->skipTo( 'text' );
00141 return strval( $this->nodeContents() );
00142 }
00143
00147 function skipTo( $name, $parent='page' ) {
00148 if( $this->atEnd ) {
00149 return false;
00150 }
00151 while( $this->reader->read() ) {
00152 if( $this->reader->nodeType == XMLREADER_ELEMENT &&
00153 $this->reader->name == $name ) {
00154 return true;
00155 }
00156 if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
00157 $this->reader->name == $parent ) {
00158 $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
00159 return false;
00160 }
00161 }
00162 return $this->close();
00163 }
00164
00172 function nodeContents() {
00173 if( $this->atEnd ) {
00174 return null;
00175 }
00176 if( $this->reader->isEmptyElement ) {
00177 return "";
00178 }
00179 $buffer = "";
00180 while( $this->reader->read() ) {
00181 switch( $this->reader->nodeType ) {
00182 case XMLREADER_TEXT:
00183
00184 case XMLREADER_SIGNIFICANT_WHITESPACE:
00185 $buffer .= $this->reader->value;
00186 break;
00187 case XMLREADER_END_ELEMENT:
00188 return $buffer;
00189 }
00190 }
00191 return $this->close();
00192 }
00193
00197 function close() {
00198 $this->reader->close();
00199 $this->atEnd = true;
00200 return null;
00201 }
00202 }