00001 <?php 00015 class Site { 00016 var $suffix, $lateral, $url; 00017 00018 function __construct( $s, $l, $u ) { 00019 $this->suffix = $s; 00020 $this->lateral = $l; 00021 $this->url = $u; 00022 } 00023 00024 function getURL( $lang ) { 00025 $xlang = str_replace( '_', '-', $lang ); 00026 return "http://$xlang.{$this->url}/wiki/\$1"; 00027 } 00028 } 00029 00030 function makeInterwikiSQL( $destDir ) { 00031 global $langlist, $languageAliases, $prefixRewrites; 00032 00033 # Multi-language sites 00034 # db suffix => db suffix, iw prefix, hostname 00035 $sites = array( 00036 'wiki' => new Site( 'wiki', 'w', 'wikipedia.org' ), 00037 'wiktionary' => new Site( 'wiktionary', 'wikt', 'wiktionary.org' ), 00038 'wikiquote' => new Site( 'wikiquote', 'q', 'wikiquote.org' ), 00039 'wikibooks' => new Site( 'wikibooks', 'b', 'wikibooks.org' ), 00040 'wikinews' => new Site( 'wikinews', 'n', 'wikinews.org' ), 00041 'wikisource' => new Site( 'wikisource', 's', 'wikisource.org' ), 00042 'wikimedia' => new Site( 'wikimedia', 'chapter', 'wikimedia.org' ), 00043 'wikiversity' => new Site( 'wikiversity', 'v', 'wikiversity.org' ), 00044 ); 00045 00046 # List of language prefixes likely to be found in multi-language sites 00047 $langlist = array_map( "trim", file( "/home/wikipedia/common/langlist" ) ); 00048 00049 # List of all database names 00050 $dblist = array_map( "trim", file( "/home/wikipedia/common/all.dblist" ) ); 00051 00052 # Special-case hostnames 00053 $specials = array( 00054 'sourceswiki' => 'sources.wikipedia.org', 00055 'quotewiki' => 'wikiquote.org', 00056 'textbookwiki' => 'wikibooks.org', 00057 'sep11wiki' => 'sep11.wikipedia.org', 00058 'metawiki' => 'meta.wikimedia.org', 00059 'commonswiki' => 'commons.wikimedia.org', 00060 'specieswiki' => 'species.wikimedia.org', 00061 ); 00062 00063 # Extra interwiki links that can't be in the intermap for some reason 00064 $extraLinks = array( 00065 array( 'm', 'http://meta.wikimedia.org/wiki/$1', 1 ), 00066 array( 'meta', 'http://meta.wikimedia.org/wiki/$1', 1 ), 00067 array( 'sep11', 'http://sep11.wikipedia.org/wiki/$1', 1 ), 00068 ); 00069 00070 # Language aliases, usually configured as redirects to the real wiki in apache 00071 # Interlanguage links are made directly to the real wiki 00072 # Something horrible happens if you forget to list an alias here, I can't 00073 # remember what 00074 $languageAliases = array( 00075 'zh-cn' => 'zh', 00076 'zh-tw' => 'zh', 00077 'dk' => 'da', 00078 'nb' => 'no', 00079 ); 00080 00081 # Special case prefix rewrites, for the benefit of Swedish which uses s:t 00082 # as an abbreviation for saint 00083 $prefixRewrites = array( 00084 'svwiki' => array( 's' => 'src' ), 00085 ); 00086 00087 # Construct a list of reserved prefixes 00088 $reserved = array(); 00089 foreach ( $langlist as $lang ) { 00090 $reserved[$lang] = 1; 00091 } 00092 foreach ( $languageAliases as $alias => $lang ) { 00093 $reserved[$alias] = 1; 00094 } 00095 foreach( $sites as $site ) { 00096 $reserved[$site->lateral] = 1; 00097 } 00098 00099 # Extract the intermap from meta 00100 $intermap = Http::get( 'http://meta.wikimedia.org/w/index.php?title=Interwiki_map&action=raw', 30 ); 00101 $lines = array_map( 'trim', explode( "\n", trim( $intermap ) ) ); 00102 00103 if ( !$lines || count( $lines ) < 2 ) { 00104 wfDie( "m:Interwiki_map not found" ); 00105 } 00106 00107 $iwArray = array(); 00108 00109 foreach ( $lines as $line ) { 00110 $matches = array(); 00111 if ( preg_match( '/^\|\s*(.*?)\s*\|\|\s*(https?:\/\/.*?)\s*$/', $line, $matches ) ) { 00112 $prefix = strtolower( $matches[1] ); 00113 $url = $matches[2]; 00114 if ( preg_match( '/(wikipedia|wiktionary|wikisource|wikiquote|wikibooks|wikimedia)\.org/', $url ) ) { 00115 $local = 1; 00116 } else { 00117 $local = 0; 00118 } 00119 00120 if ( empty( $reserved[$prefix] ) ) { 00121 $iwArray[$prefix] = array( "iw_prefix" => $prefix, "iw_url" => $url, "iw_local" => $local ); 00122 } 00123 } 00124 } 00125 00126 00127 00128 foreach ( $dblist as $db ) { 00129 $sql = "-- Generated by rebuildInterwiki.php"; 00130 if ( isset( $specials[$db] ) ) { 00131 # Special wiki 00132 # Has interwiki links and interlanguage links to wikipedia 00133 00134 $host = $specials[$db]; 00135 $sql .= "\n--$host\n\n"; 00136 $sql .= "USE $db;\n" . 00137 "TRUNCATE TABLE interwiki;\n" . 00138 "INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES \n"; 00139 $first = true; 00140 00141 # Intermap links 00142 foreach ( $iwArray as $iwEntry ) { 00143 $sql .= makeLink( $iwEntry, $first, $db ); 00144 } 00145 00146 # Links to multilanguage sites 00147 foreach ( $sites as $targetSite ) { 00148 $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( 'en' ), 1 ), $first, $db ); 00149 } 00150 00151 # Interlanguage links to wikipedia 00152 $sql .= makeLanguageLinks( $sites['wiki'], $first, $db ); 00153 00154 # Extra links 00155 foreach ( $extraLinks as $link ) { 00156 $sql .= makeLink( $link, $first, $db ); 00157 } 00158 00159 $sql .= ";\n"; 00160 } else { 00161 # Find out which site this DB belongs to 00162 $site = false; 00163 foreach( $sites as $candidateSite ) { 00164 $suffix = $candidateSite->suffix; 00165 if ( preg_match( "/(.*)$suffix$/", $db, $matches ) ) { 00166 $site = $candidateSite; 00167 break; 00168 } 00169 } 00170 if ( !$site ) { 00171 print "Invalid database $db\n"; 00172 continue; 00173 } 00174 $lang = $matches[1]; 00175 $host = "$lang." . $site->url; 00176 $sql .= "\n--$host\n\n"; 00177 00178 $sql .= "USE $db;\n" . 00179 "TRUNCATE TABLE interwiki;\n" . 00180 "INSERT INTO interwiki (iw_prefix,iw_url,iw_local) VALUES\n"; 00181 $first = true; 00182 00183 # Intermap links 00184 foreach ( $iwArray as $iwEntry ) { 00185 # Suppress links with the same name as the site 00186 if ( ( $suffix == 'wiki' && $iwEntry['iw_prefix'] != 'wikipedia' ) || 00187 ( $suffix != 'wiki' && $suffix != $iwEntry['iw_prefix'] ) ) 00188 { 00189 $sql .= makeLink( $iwEntry, $first, $db ); 00190 } 00191 } 00192 00193 # Lateral links 00194 foreach ( $sites as $targetSite ) { 00195 # Suppress link to self 00196 if ( $targetSite->suffix != $site->suffix ) { 00197 $sql .= makeLink( array( $targetSite->lateral, $targetSite->getURL( $lang ), 1 ), $first, $db ); 00198 } 00199 } 00200 00201 # Interlanguage links 00202 $sql .= makeLanguageLinks( $site, $first, $db ); 00203 00204 # w link within wikipedias 00205 # Other sites already have it as a lateral link 00206 if ( $site->suffix == "wiki" ) { 00207 $sql .= makeLink( array("w", "http://en.wikipedia.org/wiki/$1", 1), $first, $db ); 00208 } 00209 00210 # Extra links 00211 foreach ( $extraLinks as $link ){ 00212 $sql .= makeLink( $link, $first, $db ); 00213 } 00214 $sql .= ";\n"; 00215 } 00216 file_put_contents( "$destDir/$db.sql", $sql ); 00217 } 00218 } 00219 00220 # ------------------------------------------------------------------------------------------ 00221 00222 # Returns part of an INSERT statement, corresponding to all interlanguage links to a particular site 00223 function makeLanguageLinks( &$site, &$first, $source ) { 00224 global $langlist, $languageAliases; 00225 00226 $sql = ""; 00227 00228 # Actual languages with their own databases 00229 foreach ( $langlist as $targetLang ) { 00230 $sql .= makeLink( array( $targetLang, $site->getURL( $targetLang ), 1 ), $first, $source ); 00231 } 00232 00233 # Language aliases 00234 foreach ( $languageAliases as $alias => $lang ) { 00235 $sql .= makeLink( array( $alias, $site->getURL( $lang ), 1 ), $first, $source ); 00236 } 00237 return $sql; 00238 } 00239 00240 # Make SQL for a single link from an array 00241 function makeLink( $entry, &$first, $source ) { 00242 global $prefixRewrites; 00243 00244 if ( isset( $prefixRewrites[$source] ) && isset( $prefixRewrites[$source][$entry[0]] ) ) { 00245 $entry[0] = $prefixRewrites[$source][$entry[0]]; 00246 } 00247 00248 $sql = ""; 00249 # Add comma 00250 if ( $first ) { 00251 $first = false; 00252 } else { 00253 $sql .= ",\n"; 00254 } 00255 $dbr = wfGetDB( DB_SLAVE ); 00256 $sql .= "(" . $dbr->makeList( $entry ) . ")"; 00257 return $sql; 00258 }