2009年7月29日星期三

比较全的PHP编码转换类(转)

转载:http://blog.163.com/wzd_0210/blog/static/24384962008215112636864/


因为对编码不熟悉,在处理起一些编码问题来,经常搞得抓耳挠腮的,最近在PHPChina中发现了一个高人写的类。可以完成一下转换

Charset::convert(string input,string incharset,string outcharset)

Charset::unescape(string escaped,string outcharset)

Charset::escape(string string,string incharset)

Charset::jsondecode(string encoded,string outcharset)

Charset::jsonencode(mix value,string incharset)

Charset::pinYin(string chinese,string incharset)

PHP代码


<?php   
/*  
 * 编码转换    
 * 说明:  
 * jsonencode 有参考Services_JSON 但有很大区别 此处可以将utf-8 gb2312 big5都可以jsonencode  
 * jsondecode 自己原创 该算法是模拟目录读取的方法   
 *   可以将json中中文unicode编码unescape为gbk big5 utf8   
 */  
define('TABLE_DIR','./table');   
define('USEEXISTS',FALSE);//是否使用系统存在的php内置编码转换函数   
//其实php内置编码转换函数转换的不够好   
class Charset{   
       
    private static $target_lang,$source_lang;   
    protected static $string = '';   
    protected static $table = NULL;  

PHP代码


/**  
 * 编码互换  
 *  
 * @param string $source  
 * @param string $source_lang  输入编码 'utf-8' or 'gb2312' or 'big5'  
 * @param string $target_lang  输出编码 'utf-8' or 'gb2312' or 'big5'  
 * @return string  
 */  
static public function convert($source,$source_lang,$target_lang='utf-8'){   
    if($source_lang != ''){   
        $source_lang = str_replace(   
            array('gbk','utf8','big-5'),   
            array('gb2312','utf-8','big5'),   
            strtolower($source_lang)   
        );   
    }   
    if($target_lang != ''){   
        $target_lang = str_replace(   
            array('gbk','utf8','big-5'),   
            array('gb2312','utf-8','big5'),   
            strtolower($target_lang)   
        );   
    }   
    if($source_lang == $target_lang||$source == ''){   
        return $source;   
    }   
    $index = $source_lang."_".$target_lang;   
    if(USEEXISTS&&!in_array($index,array('gb2312_big5','big5_gb2312'))){//繁简互换并不是交换字符集编码    
        if(function_exists('iconv')){   
            return iconv($source_lang,$target_lang,$source);   
        }   
        if(function_exists('mb_convert_encoding')){   
            return mb_convert_encoding($source,$target_lang,$source_lang);   
        }   
    }   
    $table = self::loadtable($index);   
    if(!$table){   
        return $source;   
    }   
    self::$string = $source;   
    self::$source_lang = $source_lang;   
    self::$target_lang = $target_lang;   
    if($source_lang=='gb2312'||$source_lang=='big5'){   
        if($target_lang=='utf-8'){   
            self::$table = $table;   
            return self::CHS2UTF8();   
        }   
        if($target_lang=='gb2312'){   
            self::$table = array_flip($table);   
        }else{   
            self::$table = $table;   
        }   
        return self::BIG2GB();   
    }elseif(self::$source_lang=='utf-8'){   
        self::$table = array_flip($table);   
        return self::UTF82CHS();   
    }   
    return NULL;   
}  

PHP代码


/**  
 * js 中的unescape功能  
 *   
 * @param string $str       源字符串  
 * @param string $charset   目标字符串编码 'utf-8' or 'gb2312' or 'big5'  
 * @return string  
 */  
static public function unescape($str,$charset='utf-8'){   
    $charset = strtolower($charset);   
    self::$target_lang = str_replace(   
        array('gbk','utf8','big-5'),   
        array('gb2312','utf-8','big5'),   
        $charset  
    );   
    if(self::$target_lang!='utf-8'&&   
        !(USEEXISTS&&(function_exists('mb_convert_encoding')||function_exists('iconv')))   
    ){   
        self::$table = array_flip(self::loadtable('unescapeto'.$charset));   
    }   
    return preg_replace_callback('/[\\\\|%]u(\w{4})/iU',array('Charset','descape'),$str);   
}  

PHP代码


/**  
 * js 中的escape功能  
 *   
 * @param string $str       源字符串  
 * @param string $charset   源字符串编码 'utf-8' or 'gb2312' or 'big5'  
 * @return string  
 */  
static public function escape($str,$charset='utf-8'){   
    $escaped = '';   
    $charset = strtolower($charset);   
    $charset = str_replace(   
        array('gbk','big-5','utf8'),   
        array('gb2312','big5','utf-8'),   
        $charset  
    );   
    $ulen = strlen($str);   
    if($charset!='utf-8'){   
        $table = self::loadtable($charset.'escape');   
        for($i=0;$i<$ulen;$i++){   
            $c = $str[$i];   
            if(ord($c)>0x80){   
                $bin = $c.$str[$i+1];   
                $i += 1;   
                $escaped .= sprintf('\u%04X',$table[hexdec(bin2hex($bin))]);   
                // bin2hex 返回的是string 必须再转化   
            }else{   
                $escaped .= $c;   
            }   
        }   
        return $escaped;   
    }else{   
        for($i=0;$i<$ulen;$i++){   
            $c = $str[$i];   
            $char = ord($c);   
            switch ($char>>4){   
                case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:   
                    $escaped .= $c;   
                    break;   
                case 12: case 13:   
                    $char = ((($char&0x1F)<<6)|(ord($str[++$i])&0x3F));   
                    $escaped .= sprintf('\u%04X',$char);   
                    break;   
                case 14:   
                    $char = ((($char&0x0F)<<12)|((ord($str[++$i])&0x3F)<<6)|(ord($str[++$i])&0x3F));   
                    $escaped .= sprintf('\u%04X',$char);   
                    break;   
                default:$escaped .= $c;break;   
            }   
            /*$cb = decbin(ord($c));  
            if(strlen($cb)==8){  
                $csize = strpos(decbin(ord($cb)),"0");  
                for($j=0;$j < $csize;$j++){  
                    $i++;  
                    $c .= $str[$i];  
                }  
                $escaped .= sprintf('\u%04X',self::utf82u($c));  
            }else{  
                $escaped .= $c;  
            }*/  
        }   
        return $escaped;   
    }   
}  

PHP代码


/**  
 * json_decode  
 *   
 * @param string $encoded   源字符串  
 * @param string $charset   目标字符串编码 'utf-8' or 'gb2312' or 'big5'  
 * @return string/array/boolean/null  
 */    
static public function jsondecode($encoded,$charset='utf-8'){   
    $encoded = preg_replace('/([\t\b\f\n\r ])*/s','',$encoded);//eat whitespace   
    self::$target_lang = $charset;   
    $c = self::cursor($encoded);   
    switch($c){   
        case '{':return self::parseArray($encoded);   
        case '[':return self::parseArray($encoded,FALSE);   
        case '"':return self::string_find($encoded);   
        case 't':return TRUE;   
        case 'f':return FALSE;   
        case 'n':return NULL;   
        default:return self::num_read($c.$encoded);   
    }   
}  

PHP代码


/**  
 * json_encode  
 *   
 * @param mixvar $var       多类型变量  
 * @param string $charset   默认'utf-8'源变量中字符编码 'utf-8' or 'gb2312' or 'big5'  
 * @return string  
 */  
static public function jsonencode($var,$charset=NULL){   
    if(is_null($charset)){   
        $charset = self::$source_lang;   
    }else{   
        self::$source_lang = $charset;   
    }   
    if(!$charset){   
        $charset = 'utf-8';   
    }   
    switch (gettype($var)){   
        case 'boolean':   
            return $var ? 'true' : 'false';   
        case 'NULL':   
            return 'null';   
        case 'integer':   
            return (int) $var;   
        case 'double':   
        case 'float':   
            return (float) $var;   
        case 'string':   
            $var = strtr($var,array("\r" => '\\r',"\n" => '\\n',"\t" => '\\t',"\b"  => '\\b',   
                "\f" => '\\f','\\' => '\\\\','"' => '\"',"\x08" => '\b',"\x0c" => '\f')  
            );  
            $var = self::escape($var,$charset);  
            return '"'.$var.'"';  
        case 'array':  
            return self::encodearray($var);  
        case 'object':  
            $var = get_object_vars($var);  
            return self::encodearray($var);  
        default:return 'null';   
    }   
}  

PHP代码


    /**  
     * 汉字拼音  
     *  
     * @param string $str  
     * @param string $charset     输入编码 'utf-8' or 'gb2312' or 'big5'  
     * @return string  
     */  
    static public function PinYin($str,$charset='utf-8'){   
        if($charset!='gb2312'){   
            $str = self::convert($str,$charset,'gb2312');   
        }   
        self::$table = include(TABLE_DIR.'./pinyin.php');   
        $gblen = strlen($str);   
        $pin = '';   
        for($i=0;$i<$gblen;$i++){   
            $c = ord($str[$i]);   
            if($c > 0x00A0){   
                $index = 0x10000-($c*0x0100 + ord($str[++$i]));   
                $pin .= self::getPinYin($index);   
            }else{   
                $pin .= $str[$i];   
            }   
        }   
        return trim($pin);   
    }   
    static protected function getPinYin($index){   
        if($index==0x1534) return 'yan';   
        if($index>0x4F5F||$index<0x2807){   
            return '';   
        }   
        if(!self::$table){   
            return '';   
        }   
        while(true){   
            if(!isset(self::$table[$index])){   
                $index += 1;   
                if($index > 0x4F5F){   
                    return '';   
                }   
                continue;   
            }else{   
                return self::$table[$index];   
            }   
        }   
        return '';   
    }   
    static protected function loadtable($index){   
        static $table = array();   
        $tabIndex = '';   
        switch ($index) {   
            case 'gb2312_utf-8':   
            case 'utf-8_gb2312':   
            case 'gb2312escape':   
            case 'unescapetogb2312':   
                $tabIndex = 'gbkutf';   
                break;   
            case 'big5_utf-8':   
            case 'utf-8_big5':   
            case 'big5escape':   
            case 'unescapetobig5':   
                $tabIndex = 'big5utf';   
                break;   
            case 'gb2312_big5':   
            case 'big5_gb2312':   
                $tabIndex = 'gbkbig5';   
                break;   
            default:return NULL;   
        }   
        if(!isset($table[$tabIndex])){   
            $table[$tabIndex] = @include(TABLE_DIR."/".$tabIndex.".php");   
        }   
        return $table[$tabIndex];   
    }   
    static protected function descape($str){   
        $dec = hexdec($str[1]);   
        $str = self::u2utf8($dec);   
        if(self::$target_lang == 'utf-8'){   
            return $str;   
        }   
        if(USEEXISTS){   
            if(function_exists('iconv')){   
                return iconv('utf-8',self::$target_lang,$str);   
            }   
            if(function_exists('mb_convert_encoding')){   
                return mb_convert_encoding($str,self::$target_lang,'utf-8');   
            }   
        }   
        if(isset(self::$table[$dec])){   
            return self::hex2bin(dechex(self::$table[$dec]));   
        }else{   
            return "&#".$dec.";";   
        }   
    }   
    static protected function parseArray($str,$index=TRUE){   
        $result = array();   
        $fp = self::array_open($index,$str);//模拟打开目录   
        while($fp){   
            $type = '';   
            $key = '';   
            $value = self::array_read($fp,$type,$index,$key);//模拟读取目录   
            if($type=='{'){   
                if($index){   
                    $result[$key] = self::parseArray($fp);//递归   
                }else{   
                    $result[] = self::parseArray($fp);   
                }   
            }elseif($type=='['){   
                if($index){   
                    $result[$key] = self::parseArray($fp,FALSE);   
                }else{   
                    $result[] = self::parseArray($fp,FALSE);   
                }   
            }else{   
                if($index){   
                    $result[$key] = $value;   
                }else{   
                    $result[] = $value;   
                }   
            }   
        }   
        return $result;   
    }   
    static protected function array_open($index=TRUE,$string){   
        if($index){   
            $end = '}';   
            $new = '{';   
        }else{   
            $end = ']';   
            $new = '[';   
        }   
        $endpos = self::getpos($string,$end);   
        //用getpos获得$endpos 因为要判断{,},[,]是不是在字符串里面   
        $newpos = self::getpos($string,$new);   
        $fp = '';   
        if($endpos===FALSE){   
            return 'null';   
        }elseif($newpos===FALSE||$newpos>$endpos){   
            $fp = substr($string,0,$endpos);   
            $string = substr($string,$endpos+1);   
            return $fp;   
        }else{// 条件'if($newpos<$endpos)'可以不要了 找到与自己匹对结束符   
            $i = 1;   
            while($i){   
                $endpos = self::getpos($string,$end,$endpos+1);   
                $newpos = self::getpos($string,$new,$endpos+1);   
                if($endpos===FALSE){   
                    return 'null';   
                }elseif($newpos===FALSE){   
                    $i-=1;   
                    continue;   
                }elseif($newpos<$endpos){   
                    $i+=1;   
                    continue;   
                }else{   
                    continue;   
                }   
            }   
            $fp = substr($string,0,$endpos);   
            $string = substr($string,$endpos+1);   
            return $fp;   
        }   
    }   
    static protected function getpos($string,$sign,$offset=0){   
        /**  
         * 判断是否在字符串里面原理:  
         * 取得$offset到$pos($sign)位置之间字符串中'"'个数  
         * 如果为奇数说明在字符串里面 否则在字符串外面  
         */  
        $pos = strpos($string,$sign,$offset);   
        if($pos===FALSE){   
            return FALSE;   
        }   
        $str = substr($string,$offset,$pos-$offset);   
        $arr = array();   
        preg_match_all('/"/',str_replace('\"','',$str),$arr);   
        $in = count($arr[0])%2;   
        if(!$in){   
            return $pos;   
        }   
        do{   
            $next = strpos($string,$sign,$pos+1);   
            if($next===FALSE){   
                return FALSE;   
            }   
            $str = substr($string,$pos,$next-$pos);   
            $arr = array();   
            preg_match_all('/"/',str_replace('\"','',$str),$arr);   
            $in = !(count($arr[0])%2);   
            $pos = $next;   
        }while($in);   
        return $pos;   
    }   
    static protected function array_read($fp,$type,$index=TRUE,$key=null){   
        if($fp[0]==','){   
            self::cursor($fp);//跳过','   
        }   
        if($index){//有索引的数组    
            self::cursor($fp);//跳过 '"'合法   
            $key = self::string_find($fp);//读取索引值   
            self::cursor($fp);//跳过':'   
        }   
        $c = self::cursor($fp);   
        switch($c){   
            case '{':   
                $type='{';   
                return NULL;   
            case '[':   
                $type='[';   
                return NULL;   
            case '"':   
                $rs = self::string_find($fp);   
                $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法   
                if(!($s==','||$s==null)){   
                    die('parse error1!');   
                }   
                return $rs;   
            case 't':   
                if(self::cursor($fp,3)=='rue'){//跳过'rue'   
                    $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法   
                    if(!($s==','||$s==null)){   
                        die("parse error$s!");  
                    }  
                    return TRUE;  
                }else{  
                    die('parse error3!');  
                }  
            case 'f':  
                if(self::cursor($fp,4)=='alse'){  
                    $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法  
                    if(!($s==','||$s==null)){  
                        die('parse error4!');  
                    }  
                    return FALSE;  
                }else{  
                    die('parse error5!');  
                }  
            case 'n':  
                if(self::cursor($fp,3)=='ull'){//跳过'ull'  
                    $s = self::cursor($fp);//跳过','or '}' or ']' 要求合法  
                    if(!($s==','||$s==null)){  
                        die('parse error6!');  
                    }  
                    return NULL;  
                }  
            default:  
                $pos = strpos($fp,',');  
                if($pos===FALSE){  
                    $num = substr($fp,0);  
                    $fp = '';  
                }else{  
                    $num = substr($fp,0,$pos);  
                    $fp = substr($fp,$pos+1);  
                }  
                return self::num_read($c.$num);  
        }  
    }  
    static protected function string_find(&$str){  
        $end = strpos($str,'"',0);   
        while($str[$end-1]=='\\'){  
            $end = strpos($str,'"',$end+1);  
            if($end===FALSE){  
                return 'null';  
            }  
        }  
        $escaped = strtr(  
            rtrim(self::cursor($str,$end+1),'"'),  
            array('\\"' => '"','\\\\'=> '\\','\\/'=> '/','\\b' => chr(8),  
                '\\f'=>chr(12),'\\n'=>chr(10),'\\r'=> chr(13),  
                '\\t'=>chr(9),'\\u'=>'%u'  
            )  
        );  
        return self::unescape($escaped,self::$target_lang);  
    }  
    static protected function num_read($str){  
        $matches = array();  
        if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',$str,$matches)){  
            $num = $matches[0];  
            $val   = intval($num);  
            $fval  = floatval($num);  
            $value = $val?$val:$fval;  
            return $value;  
        }else{  
            return NULL;  
        }  
    }  
    static protected function cursor(&$str,$shift=1){  
        $get = substr($str,0,$shift);  
        $str = substr($str,$shift);  
        return $get;  
    }  
    static protected function encodearray($array){  
        if(!$array){  
            return 'null';  
        }  
        if((array_keys($array)!==range(0,sizeof($array)- 1))){  
            $rs = '';  
            foreach($array as $key=>$value){  
                $rs .= ','.self::jsonencode(strval($key)).':';  
                if(is_array($value)){  
                    $rs .= self::encodearray($value);  
                }else{  
                    $rs .= self::jsonencode($value);  
                }  
            }  
            $rs = '{'.ltrim($rs,',').'}';  
            return $rs;  
        }else{  
            $rs = '';  
            foreach($array as $value){  
                if(is_array($value)){  
                    $rs .= ','.self::encodearray($value);  
                }else{  
                    $rs .= ','.self::jsonencode($value);  
                }  
            }  
            $rs = '['.ltrim($rs,',').']';  
            return $rs;  
        }  
    }  
    static protected function CHS2UTF8(){  
        $utf8 = "";  
        while(self::$string){  
            if (ord(self::$string[0]) > 0x80){  
                $bin = substr(self::$string,0,2);  
                $utf8 .= self::u2utf8(self::$table[hexdec(bin2hex($bin))]);  
                self::$string = substr(self::$string,2);  
            }else{  
                $utf8 .= self::$string[0];  
                self::$string = substr(self::$string,1);  
            }  
        }  
        return $utf8;  
    }  
    static protected function UTF82CHS(){  
        $chs  = "";  
        $ulen = strlen(self::$string);  
        for($i=0;$i<$ulen;$i++){  
            $c = self::$string[$i];  
            $char = ord($c);  
            switch ($char>>4){  
                case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:  
                    $chs .= $c;  
                    break;  
                case 12: case 13:  
                    $char = self::$table[(($char&0x1F)<<6)|(ord(self::$string[++$i])&0x3F)];  
                    $chs .= self::hex2bin(dechex($char));  
                    break;  
                case 14:  
                    $char = self::$table[  
                        (($char&0x0F)<<12)  
                        |((ord(self::$string[++$i])&0x3F)<<6)  
                        |(ord(self::$string[++$i])& 0x3F)  
                    ];  
                    $chs .= self::hex2bin(dechex($char));  
                    break;  
                default:$chs .= $c;break;  
            }  
        }  
        /*for($i=0;$i<$ulen;$i++){  
            $c = self::$string[$i];  
            $cb = decbin(ord($c));  
            if(strlen($cb)==8){  
                $csize = strpos(decbin(ord($cb)),"0");  
                for($j=0;$j < $csize;$j++){  
                    $i++;  
                    $c .= self::$string[$i];  
                }  
                $c = self::utf82u($c);  
                if(isset(self::$table[$c])){  
                    $chs .= self::hex2bin(dechex(self::$table[$c]));  
                }else{   
                    $chs .= "&#".$c.";";  
                }  
            }else{  
                $chs .= $c;  
            }  
        }*/  
        return trim($chs);  
    }  
    static protected function BIG2GB(){  
        $ret = '';  
        while(self::$string){  
            if(ord(self::$string[0]) > 0x80){  
                $index = hexdec(bin2hex(self::$string[0].self::$string[1]));  
                $value = self::$table[$index];  
                $ret .= self::hex2bin(dechex($value));  
                self::$string = substr(self::$string,2);  
            }else{  
                $ret .= self::$string[0];  
                self::$string = substr(self::$string,1);  
            }  
        }  
        return $ret;  
    }  
    static protected function u2utf8($c){  
        $str = '';  
        if ($c < 0x80){  
            $str.= chr($c);  
        }elseif($c < 0x800){  
            $str.= chr(0xC0 | $c>>6);  
            $str.= chr(0x80 | $c & 0x3F);  
        }elseif($c < 0x10000){  
            $str.= chr(0xE0 | $c>>12);  
            $str.= chr(0x80 | $c>>6 & 0x3F);  
            $str.= chr(0x80 | $c & 0x3F);  
        }elseif($c < 0x200000){  
            $str.= chr(0xF0 | $c>>18);  
            $str.= chr(0x80 | $c>>12 & 0x3F);  
            $str.= chr(0x80 | $c>>6 & 0x3F);  
            $str.= chr(0x80 | $c & 0x3F);  
        }  
        return $str;  
    }  
    static protected function utf82u($c){  
        switch(strlen($c)) {  
            case 1:  
                return ord($c);  
                break;  
            case 2:  
                $n  = (ord($c[0]) & 0x3f) << 6;  
                $n += ord($c[1]) & 0x3f;  
                return $n;  
                break;  
            case 3:  
                $n  = (ord($c[0]) & 0x1f) << 12;  
                $n += (ord($c[1]) & 0x3f) << 6;  
                $n += ord($c[2]) & 0x3f;  
                return $n;  
                break;  
            case 4:  
                $n  = (ord($c[0]) & 0x0f) << 18;  
                $n += (ord($c[1]) & 0x3f) << 12;  
                $n += (ord($c[2]) & 0x3f) << 6;  
                $n += ord($c[3]) & 0x3f;  
                return $n;  
                break;  
            default:return '';break;  
        }  
    }  
    static protected function hex2bin($hexdata){  
        $bindata = '';   
        for ($i = 0, $count = strlen($hexdata); $i < $count; $i += 2){   
            $bindata .= chr(hexdec($hexdata[$i].$hexdata[$i + 1]));   
        }   
        return $bindata;   
    }   
}  

没有评论: