00001 <?php 00002 # Copyright (C) 2004 Brion Vibber <brion@pobox.com> 00003 # http://www.mediawiki.org/ 00004 # 00005 # This program is free software; you can redistribute it and/or modify 00006 # it under the terms of the GNU General Public License as published by 00007 # the Free Software Foundation; either version 2 of the License, or 00008 # (at your option) any later version. 00009 # 00010 # This program is distributed in the hope that it will be useful, 00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 # GNU General Public License for more details. 00014 # 00015 # You should have received a copy of the GNU General Public License along 00016 # with this program; if not, write to the Free Software Foundation, Inc., 00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00018 # http://www.gnu.org/copyleft/gpl.html 00019 00029 require_once dirname(__FILE__).'/UtfNormalDefines.php'; 00030 00039 function codepointToUtf8( $codepoint ) { 00040 if($codepoint < 0x80) return chr($codepoint); 00041 if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f | 0xc0) . 00042 chr($codepoint & 0x3f | 0x80); 00043 if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) . 00044 chr($codepoint >> 6 & 0x3f | 0x80) . 00045 chr($codepoint & 0x3f | 0x80); 00046 if($codepoint < 0x110000) return chr($codepoint >> 18 & 0x07 | 0xf0) . 00047 chr($codepoint >> 12 & 0x3f | 0x80) . 00048 chr($codepoint >> 6 & 0x3f | 0x80) . 00049 chr($codepoint & 0x3f | 0x80); 00050 00051 echo "Asked for code outside of range ($codepoint)\n"; 00052 die( -1 ); 00053 } 00054 00064 function hexSequenceToUtf8( $sequence ) { 00065 $utf = ''; 00066 foreach( explode( ' ', $sequence ) as $hex ) { 00067 $n = hexdec( $hex ); 00068 $utf .= codepointToUtf8( $n ); 00069 } 00070 return $utf; 00071 } 00072 00081 function utf8ToHexSequence( $str ) { 00082 return rtrim( preg_replace( '/(.)/uSe', 00083 'sprintf("%04x ", utf8ToCodepoint("$1"))', 00084 $str ) ); 00085 } 00086 00095 function utf8ToCodepoint( $char ) { 00096 # Find the length 00097 $z = ord( $char{0} ); 00098 if ( $z & 0x80 ) { 00099 $length = 0; 00100 while ( $z & 0x80 ) { 00101 $length++; 00102 $z <<= 1; 00103 } 00104 } else { 00105 $length = 1; 00106 } 00107 00108 if ( $length != strlen( $char ) ) { 00109 return false; 00110 } 00111 if ( $length == 1 ) { 00112 return ord( $char ); 00113 } 00114 00115 # Mask off the length-determining bits and shift back to the original location 00116 $z &= 0xff; 00117 $z >>= $length; 00118 00119 # Add in the free bits from subsequent bytes 00120 for ( $i=1; $i<$length; $i++ ) { 00121 $z <<= 6; 00122 $z |= ord( $char{$i} ) & 0x3f; 00123 } 00124 00125 return $z; 00126 } 00127 00135 function escapeSingleString( $string ) { 00136 return strtr( $string, 00137 array( 00138 '\\' => '\\\\', 00139 '\'' => '\\\'' 00140 )); 00141 }