00001 <?php 00002 # Copyright (C) 2005 Brion Vibber <brion@pobox.com> 00003 # http://www.mediawiki.org/ 00004 # 00005 # This program is free software; you can redistribute it and/or modify 00006 # it under the terms of the GNU General Public License as published by 00007 # the Free Software Foundation; either version 2 of the License, or 00008 # (at your option) any later version. 00009 # 00010 # This program is distributed in the hope that it will be useful, 00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 # GNU General Public License for more details. 00014 # 00015 # You should have received a copy of the GNU General Public License along 00016 # with this program; if not, write to the Free Software Foundation, Inc., 00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00018 # http://www.gnu.org/copyleft/gpl.html 00019 00029 class UserDupes { 00030 var $db; 00031 var $reassigned; 00032 var $trimmed; 00033 var $failed; 00034 00035 function UserDupes( &$database ) { 00036 $this->db =& $database; 00037 } 00038 00044 function hasUniqueIndex() { 00045 $fname = 'UserDupes::hasUniqueIndex'; 00046 $info = $this->db->indexInfo( 'user', 'user_name', $fname ); 00047 if( !$info ) { 00048 wfOut( "WARNING: doesn't seem to have user_name index at all!\n" ); 00049 return false; 00050 } 00051 00052 # Confusingly, 'Non_unique' is 0 for *unique* indexes, 00053 # and 1 for *non-unique* indexes. Pass the crack, MySQL, 00054 # it's obviously some good stuff! 00055 return ( $info[0]->Non_unique == 0 ); 00056 } 00057 00069 function clearDupes() { 00070 return $this->checkDupes( true ); 00071 } 00072 00087 function checkDupes( $doDelete = false ) { 00088 if( $this->hasUniqueIndex() ) { 00089 echo wfWikiID()." already has a unique index on its user table.\n"; 00090 return true; 00091 } 00092 00093 $this->lock(); 00094 00095 wfOut( "Checking for duplicate accounts...\n" ); 00096 $dupes = $this->getDupes(); 00097 $count = count( $dupes ); 00098 00099 wfOut( "Found $count accounts with duplicate records on ".wfWikiID().".\n" ); 00100 $this->trimmed = 0; 00101 $this->reassigned = 0; 00102 $this->failed = 0; 00103 foreach( $dupes as $name ) { 00104 $this->examine( $name, $doDelete ); 00105 } 00106 00107 $this->unlock(); 00108 00109 wfOut( "\n" ); 00110 00111 if( $this->reassigned > 0 ) { 00112 if( $doDelete ) { 00113 wfOut( "$this->reassigned duplicate accounts had edits reassigned to a canonical record id.\n" ); 00114 } else { 00115 wfOut( "$this->reassigned duplicate accounts need to have edits reassigned.\n" ); 00116 } 00117 } 00118 00119 if( $this->trimmed > 0 ) { 00120 if( $doDelete ) { 00121 wfOut( "$this->trimmed duplicate user records were deleted from ".wfWikiID().".\n" ); 00122 } else { 00123 wfOut( "$this->trimmed duplicate user accounts were found on ".wfWikiID()." which can be removed safely.\n" ); 00124 } 00125 } 00126 00127 if( $this->failed > 0 ) { 00128 wfOut( "Something terribly awry; $this->failed duplicate accounts were not removed.\n" ); 00129 return false; 00130 } 00131 00132 if( $this->trimmed == 0 || $doDelete ) { 00133 wfOut( "It is now safe to apply the unique index on user_name.\n" ); 00134 return true; 00135 } else { 00136 wfOut( "Run this script again with the --fix option to automatically delete them.\n" ); 00137 return false; 00138 } 00139 } 00140 00145 function lock() { 00146 $fname = 'UserDupes::lock'; 00147 if( $this->newSchema() ) { 00148 $set = array( 'user', 'revision' ); 00149 } else { 00150 $set = array( 'user', 'cur', 'old' ); 00151 } 00152 $names = array_map( array( $this, 'lockTable' ), $set ); 00153 $tables = implode( ',', $names ); 00154 00155 $this->db->query( "LOCK TABLES $tables", $fname ); 00156 } 00157 00158 function lockTable( $table ) { 00159 return $this->db->tableName( $table ) . ' WRITE'; 00160 } 00161 00166 function newSchema() { 00167 return class_exists( 'Revision' ); 00168 } 00169 00173 function unlock() { 00174 $fname = 'UserDupes::unlock'; 00175 $this->db->query( "UNLOCK TABLES", $fname ); 00176 } 00177 00183 function getDupes() { 00184 $fname = 'UserDupes::listDupes'; 00185 $user = $this->db->tableName( 'user' ); 00186 $result = $this->db->query( 00187 "SELECT user_name,COUNT(*) AS n 00188 FROM $user 00189 GROUP BY user_name 00190 HAVING n > 1", $fname ); 00191 00192 $list = array(); 00193 while( $row = $this->db->fetchObject( $result ) ) { 00194 $list[] = $row->user_name; 00195 } 00196 $this->db->freeResult( $result ); 00197 00198 return $list; 00199 } 00200 00209 function examine( $name, $doDelete ) { 00210 $fname = 'UserDupes::listDupes'; 00211 $result = $this->db->select( 'user', 00212 array( 'user_id' ), 00213 array( 'user_name' => $name ), 00214 $fname ); 00215 00216 $firstRow = $this->db->fetchObject( $result ); 00217 $firstId = $firstRow->user_id; 00218 wfOut( "Record that will be used for '$name' is user_id=$firstId\n" ); 00219 00220 while( $row = $this->db->fetchObject( $result ) ) { 00221 $dupeId = $row->user_id; 00222 wfOut( "... dupe id $dupeId: " ); 00223 $edits = $this->editCount( $dupeId ); 00224 if( $edits > 0 ) { 00225 $this->reassigned++; 00226 wfOut( "has $edits edits! " ); 00227 if( $doDelete ) { 00228 $this->reassignEdits( $dupeId, $firstId ); 00229 $newEdits = $this->editCount( $dupeId ); 00230 if( $newEdits == 0 ) { 00231 wfOut( "confirmed cleaned. " ); 00232 } else { 00233 $this->failed++; 00234 wfOut( "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n" ); 00235 continue; 00236 } 00237 } else { 00238 wfOut( "(will need to reassign edits on fix)" ); 00239 } 00240 } else { 00241 wfOut( "ok, no edits. " ); 00242 } 00243 $this->trimmed++; 00244 if( $doDelete ) { 00245 $this->trimAccount( $dupeId ); 00246 } 00247 wfOut( "\n" ); 00248 } 00249 $this->db->freeResult( $result ); 00250 } 00251 00260 function editCount( $userid ) { 00261 if( $this->newSchema() ) { 00262 return $this->editCountOn( 'revision', 'rev_user', $userid ); 00263 } else { 00264 return $this->editCountOn( 'cur', 'cur_user', $userid ) + 00265 $this->editCountOn( 'old', 'old_user', $userid ); 00266 } 00267 } 00268 00277 function editCountOn( $table, $field, $userid ) { 00278 $fname = 'UserDupes::editCountOn'; 00279 return intval( $this->db->selectField( 00280 $table, 00281 'COUNT(*)', 00282 array( $field => $userid ), 00283 $fname ) ); 00284 } 00285 00291 function reassignEdits( $from, $to ) { 00292 $set = $this->newSchema() 00293 ? array( 'revision' => 'rev_user' ) 00294 : array( 'cur' => 'cur_user', 'old' => 'old_user' ); 00295 foreach( $set as $table => $field ) { 00296 $this->reassignEditsOn( $table, $field, $from, $to ); 00297 } 00298 } 00299 00307 function reassignEditsOn( $table, $field, $from, $to ) { 00308 $fname = 'UserDupes::reassignEditsOn'; 00309 wfOut( "reassigning on $table... " ); 00310 $this->db->update( $table, 00311 array( $field => $to ), 00312 array( $field => $from ), 00313 $fname ); 00314 wfOut( "ok. " ); 00315 } 00316 00322 function trimAccount( $userid ) { 00323 $fname = 'UserDupes::trimAccount'; 00324 wfOut( "deleting..." ); 00325 $this->db->delete( 'user', array( 'user_id' => $userid ), $fname ); 00326 wfOut( " ok" ); 00327 } 00328 00329 }