00001 <?php
00008 function refreshLinks( $start, $newOnly = false, $maxLag = false, $end = 0, $redirectsOnly = false, $oldRedirectsOnly = false ) {
00009 global $wgUser, $wgParser, $wgUseTidy;
00010
00011 $reportingInterval = 100;
00012 $fname = 'refreshLinks';
00013 $dbr = wfGetDB( DB_SLAVE );
00014 $start = intval( $start );
00015
00016 # Don't generate TeX PNGs (lack of a sensible current directory causes errors anyway)
00017 $wgUser->setOption('math', MW_MATH_SOURCE);
00018
00019 # Don't generate extension images (e.g. Timeline)
00020 if( method_exists( $wgParser, "clearTagHooks" ) ) {
00021 $wgParser->clearTagHooks();
00022 }
00023
00024 # Don't use HTML tidy
00025 $wgUseTidy = false;
00026
00027 $what = $redirectsOnly ? "redirects" : "links";
00028
00029 if( $oldRedirectsOnly ) {
00030 # This entire code path is cut-and-pasted from below. Hurrah.
00031 $res = $dbr->query(
00032 "SELECT page_id ".
00033 "FROM page ".
00034 "LEFT JOIN redirect ON page_id=rd_from ".
00035 "WHERE page_is_redirect=1 AND rd_from IS NULL AND ".
00036 ($end == 0 ? "page_id >= $start"
00037 : "page_id BETWEEN $start AND $end"),
00038 $fname
00039 );
00040 $num = $dbr->numRows( $res );
00041 print "Refreshing $num old redirects from $start...\n";
00042
00043 while( $row = $dbr->fetchObject( $res ) ) {
00044 if ( !( ++$i % $reportingInterval ) ) {
00045 print "$i\n";
00046 wfWaitForSlaves( $maxLag );
00047 }
00048 fixRedirect( $row->page_id );
00049 }
00050 } elseif( $newOnly ) {
00051 print "Refreshing $what from ";
00052 $res = $dbr->select( 'page',
00053 array( 'page_id' ),
00054 array(
00055 'page_is_new' => 1,
00056 "page_id >= $start" ),
00057 $fname
00058 );
00059 $num = $dbr->numRows( $res );
00060 print "$num new articles...\n";
00061
00062 $i = 0;
00063 while ( $row = $dbr->fetchObject( $res ) ) {
00064 if ( !( ++$i % $reportingInterval ) ) {
00065 print "$i\n";
00066 wfWaitForSlaves( $maxLag );
00067 }
00068 if($redirectsOnly)
00069 fixRedirect( $row->page_id );
00070 else
00071 fixLinksFromArticle( $row->page_id );
00072 }
00073 } else {
00074 print "Refreshing $what table.\n";
00075 if ( !$end ) {
00076 $end = $dbr->selectField( 'page', 'max(page_id)', false );
00077 }
00078 print("Starting from page_id $start of $end.\n");
00079
00080 for ($id = $start; $id <= $end; $id++) {
00081
00082 if ( !($id % $reportingInterval) ) {
00083 print "$id\n";
00084 wfWaitForSlaves( $maxLag );
00085 }
00086 if($redirectsOnly)
00087 fixRedirect( $id );
00088 else
00089 fixLinksFromArticle( $id );
00090 }
00091 }
00092 }
00093
00094 function fixRedirect( $id ){
00095 global $wgTitle, $wgArticle;
00096
00097 $wgTitle = Title::newFromID( $id );
00098 $dbw = wfGetDB( DB_MASTER );
00099
00100 if ( is_null( $wgTitle ) ) {
00101 return;
00102 }
00103 $wgArticle = new Article($wgTitle);
00104
00105 $rt = $wgArticle->followRedirect();
00106
00107 if($rt == false || !is_object($rt))
00108 return;
00109
00110 $wgArticle->updateRedirectOn($dbw,$rt);
00111 }
00112
00113 function fixLinksFromArticle( $id ) {
00114 global $wgTitle, $wgParser;
00115
00116 $wgTitle = Title::newFromID( $id );
00117 $dbw = wfGetDB( DB_MASTER );
00118
00119 $linkCache =& LinkCache::singleton();
00120 $linkCache->clear();
00121
00122 if ( is_null( $wgTitle ) ) {
00123 return;
00124 }
00125 $dbw->begin();
00126
00127 $revision = Revision::newFromTitle( $wgTitle );
00128 if ( !$revision ) {
00129 return;
00130 }
00131
00132 $options = new ParserOptions;
00133 $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() );
00134 $update = new LinksUpdate( $wgTitle, $parserOutput, false );
00135 $update->doUpdate();
00136 $dbw->immediateCommit();
00137 }
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148 function deleteLinksFromNonexistent( $maxLag = 0, $batchSize = 100 ) {
00149 wfWaitForSlaves( $maxLag );
00150
00151 $dbw = wfGetDB( DB_MASTER );
00152
00153 $lb = wfGetLBFactory()->newMainLB();
00154 $dbr = $lb->getConnection( DB_SLAVE );
00155 $dbr->bufferResults( false );
00156
00157 $linksTables = array(
00158 'pagelinks' => 'pl_from',
00159 'imagelinks' => 'il_from',
00160 'categorylinks' => 'cl_from',
00161 'templatelinks' => 'tl_from',
00162 'externallinks' => 'el_from',
00163 );
00164
00165 foreach ( $linksTables as $table => $field ) {
00166 print "Retrieving illegal entries from $table... ";
00167
00168
00169 $results = $dbr->select( array( $table, 'page' ),
00170 $field,
00171 array('page_id' => null ),
00172 __METHOD__,
00173 'DISTINCT',
00174 array( 'page' => array( 'LEFT JOIN', "$field=page_id"))
00175 );
00176
00177 $counter = 0;
00178 $list = array();
00179 print "0..";
00180
00181 foreach( $results as $row ) {
00182 $counter++;
00183 $list[] = $row->$field;
00184 if ( ( $counter % $batchSize ) == 0 ) {
00185 wfWaitForSlaves(5);
00186 $dbw->delete( $table, array( $field => $list ), __METHOD__ );
00187
00188 print $counter . "..";
00189 $list = array();
00190 }
00191 }
00192
00193 print $counter;
00194 if (count($list) > 0) {
00195 $dbw->delete( $table, array( $field => $list ), __METHOD__ );
00196 }
00197
00198 print "\n";
00199 }
00200
00201 $lb->closeAll();
00202 }