网络营销电子商务研究中心

网络营销电子商务研究中心 (https://www.0058.net/index.php)
-   代码交流 (https://www.0058.net/forumdisplay.php?f=16)
-   -   文件批量转换器(从ansi 到 Utf-8) --转 (https://www.0058.net/showthread.php?t=3168)

topvip 2008-03-31 11:28 AM

文件批量转换器(从ansi 到 Utf-8) --转
 
文件批量转换器(从ANSI 到 UTF-8)(PHP)
其中几个函数很好用,我花了一天时间
直接创建多层目录mymkdir($dirname),
批处理目录下的文件function listDirTree( $dirName = null ,$callFunction) 可自定义处理函数。
如:
PHP 代码:
Code:

$callFunc="backupFile(\"$backupPath\",\$file,\$filePath,\$dirName);";
listDirTree($operateDir,$callFunc);

将文本中 x=y
u=a
提取为数组的函数
function myExplode($originalStr,$firstTag,$secondTag)
将文件转换为utf-8编码的函数。function gb2utf8($file,$filePath)
判断文件是否为utf-8编码的函数 function utf8_probability(&$rawtextstr)



PHP 代码:
Code:

<?php
//header('Content-Type: text/html; charset=utf-8');
$fname=str_replace("\\\\","/",$_POST['fname']);
$encode=$_POST['encode'];
$changeType=$_POST['changetype'];
if($changeType == "dir")$fname = dirname($fname);
echo "需要转换的文件目录:".dirname($fname) ."<BR>";
echo "已有的文件编码格式:$encode";
echo "<PRE>";
if($fname)
{
$fc= new fileconv();
$fc->changeType="file";
    if(is_dir($fname))
        {   
            $fc->changeType="dir";
            $fc->operateDir=$fname;
            $fc->operateDirLen=strlen($fc->operateDir);
        }
    else if(is_file($fname))
        {
            $fc->operateFile=$fname;
            $fc->operateDirLen=strlen($fc->operateFile);
        }
    else{
        echo "fatal error";die;
        }
       
        $fc->backupPath="d:/usr/www/html/bkup_test/";
        $fc->mainCallFunc="\$this->gb2utf8(\$file,\$filePath);";
        if($encode){$fc->encode=$encode;}
        $fc->mainsub();
}
class fileconv{
    var $currentName;   
    var $changeType;    //=dir 则操作目录,=file则操作文件。
    var $operateFile;    //如果this->changeType为file单个文件,那么这个值就是要操作的单个文件。
    var $encode;
    /**
    * destEncode is the encode type you want to convert the text file to .
    * *access public
    */
    var $destEncode;   
    var $extname;        //有效文本文件的扩展名。
    var $operateDir;    //需要操作的目录
    var $operateDirLen;  //备份过程中用来截取子目录名的时候用
    var $backupPath;    //需要备份到的目录,如此目录不存在则自动创建
    var $callFunc;        //操作函数,用于lisDirTree里的调用
    var $mainCallFunc;  //主操作函数.

    function fileconv()
    {
    $currentName="fileconv.php";
   
    //初始化操作数据文件类型
    $this->extname        = ".php;.xml;html;.htm;.css;.txt;.js;.ini;.sql;";
    $this->encode        = "gb2312";
    $this->destEncode    = "utf-8";
    //初始化结果数据。
    $this->utf8File=0;
    $this->convertedFile=0;
    $this->notTextFile=0;
    }
   
function mainsub()
{    //echo "2005-6-14mainsub start $this->backupPath";die;
    if($this->changeType=="dir"){
       
        if(strpos($this->backupPath,$this->operateDir)===false)
        {    clearstatcache();
            if(!is_dir($this->backupPath))$this->mymkdir($this->backupPath);
            $this->backup($this->operateDir,$this->backupPath);
            $this->callFunc="$this->gb2utf8(\$file,\$filePath);";
            $this->callFunc=$this->mainCallFunc;           
            $this->listDirTree($this->operateDir);
        }
        else
        {
            echo "Fetal Error You must change your backupdir ,<BR>It can't be under the directory you want to backup";
            die;
        }
    }// />转换的为目录
    else if($this->changeType=="file"){
        $posfile=strrpos($this->operateFile,"/");       
        $file=substr($this->operateFile,$posfile+1);
        $filepath=substr($this->operateFile,0,$posfile);
        $this->backupPath=$this->currentpath()."/testbk/";
        //echo "$file  $filepath $this->backupPath<BR>";
        $this->backupFile($file,$this->operateFile,$filepath);
        $this->gb2utf8($file,$this->operateFile);
    }// />转换的为文件
    $this->echoResult();
}//mainsub end
function backup()
{   
   
   
    if($this->readtag($this->backupPath)==0)//如果没有备份过,那么,执行备份
    {   
        $this->callFunc="\$this->backupFile(\$file,\$filePath,\$dirName);";
        $this->listDirTree($this->operateDir);
        $this->setTag($this->backupPath); //设置备份标记
    }
    else
    {//如果备份过了,那么,exit;
        echo "backuped already";
    }
}//function backup end
function backupFile($file,$filePath,$dirName)
{   
    $subdir = substr($dirName."/",$this->operateDirLen+1);
    $this->mymkdir($this->backupPath.$subdir); //创建子目录
    $bkfile=$this->backupPath.$subdir.$file;
    echo "子目录:$subdir 备份文件为: $bkfile 原始文件为:$filePath<BR>";
    $fstr=file_get_contents($filePath);
    $fhandle=fopen($bkfile,"wb");
    fwrite($fhandle,$fstr);
    fclose($fhandle);
}
function echoResult()
{global $utf8File,$convertedFile,$notTextFile;
//echo (microtime()-$tm1)."秒<BR>";
echo "<BR>utf8文件:".$this->utf8File;
echo "<BR>被转换的文件总数:".$this->convertedFile;
echo "<BR>非文本文件数目:".$this->notTextFile;
}
function readtag( $backupDir )
{    //echo "$backupDir ";
    if(!is_dir($backupDir)){
        $this->mymkdir($backupDir);
    }
    if(!is_file($backupDir."tag.ini"))  //如果tag.ini不存在,则建立并初始化
    {    if(!$fp=fopen($backupDir."tag.ini","w")){
            echo "error while reading or creating the tag.ini ";
        }
        else
        {
            $str="backuped=0";
            if(!fwrite($fp,$str)){echo "error while init the tag.ini ";}
            fclose($fp);
        }
    $this->gb2utf8("tag.ini",$backupDir."tag.ini"); //将新建的tag.ini转为utf-8编码
    return 0;
    }   
    else  //如果tag.ini已经存在,读取其中的配置
    {
    $backupTag=file_get_contents($backupDir."tag.ini");   
    $tagTree=$this->myExplode($backupTag,"/\n/","=");
    //print_r($tagTree); //tagtree存储所有tag.ini里的 x=y 的信息
    return $tagTree['backuped'];
    }
}
function setTag($backupDir)
{
    $tagini=file_get_contents($backupDir."tag.ini");
   
    if(!$fp=fopen($backupDir."tag.ini","w")){
        echo "error while reading or creating the tag.ini ";
    }
    else
    {   
        $str="backuped=1";
        $pattern="/backuped=0/";
        if(!strpos($tagini,"backuped=",0))
        {
            $tagini=$str;
        }
        else
        {
            $tagini=preg_replace($pattern,$str,$tagini);       
        }
        if(!fwrite($fp,$tagini))
        {
            echo "error while set the tag.ini ";
        }
        fclose($fp);
    }
}
function myExplode($originalStr,$firstTag,$secondTag)
{
        $firstArray=preg_split($firstTag,$originalStr);
        foreach($firstArray as $first)
        {
            $secondArray=explode($secondTag,$first);
            $tagTree[$secondArray[0]]=$secondArray[1];
        }
        return $tagTree;
}
function mymkdir($mainDir)
{
if(!is_dir($mainDir))
{
global $validFatherDir;
$this->getFatherDir($mainDir);
$fullLen=strlen($mainDir); //控制循环
//echo "需要创建的目录".$mainDir."长度为 $fullLen <BR>";die;
//echo "最高有效的父目录".$validFatherDir."<BR>";
$validDirLen=strlen($validFatherDir);
    while(($i<10 )&&($fullLen>$validDirLen+1))
    {
        //echo "父目录长度:$validDirLen<BR>";
        $pos=strpos($mainDir,"/",$validDirLen+1);
        //echo "/所在位置:$pos<BR>";
        $dirname=substr($mainDir,$validDirLen,$pos-$validDirLen);
        //echo "正在创建目录".$validFatherDir.$dirname."<BR>";
       
        $validFatherDir.=$dirname;
        if(!mkdir($validFatherDir))
            {
            echo "Fetal Error accoured while create directory";die;
            }
        $validDirLen=strlen($validFatherDir);
        $i++;
    }
}
}//mymkdir function end
function getFatherDir($mainDir)
{    global $validFatherDir;
    $pos = strrpos($mainDir,"/");
        if ($pos===false)
        {
            // not found
            echo "Fetal Error While Get Valid Father Dir";die;
            return false;
        }
        else
        {
            $fatherDir=substr($mainDir,0,$pos);
            if(!is_dir($fatherDir))
            {    echo "<font color=red >$fatherDir is not exists<BR></font>";
                $this->getFatherDir($fatherDir);
            }
            else
            {
                $validFatherDir= $fatherDir;
            }
        }
       
}
/** 函数 listDirTree( $dirName = null )
* 功能 列出目录下所有文件及子目录
* 参数 $dirName 目录名称
* 返回 目录结构数组 false为失败
*/
function listDirTree( $dirName = null )
{//global $tree;
if( empty( $dirName ) )
exit( "IBFileSystem: directory is empty." );
if( is_dir( $dirName ) )
{
if( $dh = opendir( $dirName ) )
{
//$tree = array();
while( ( $file = readdir( $dh ) ) !== false )
{
if( $file != "." && $file != ".." )
{
$filePath = $dirName . "/" . $file;
if( is_dir( $filePath ) )//为目录,递归
{
    //$tree[$file] = $this->listDirTree( $filePath );
    $this->listDirTree( $filePath );
}
else//为文件,进行处理
{    //echo "文件处理函数为 $this->callFunc <BR>";
   
    eval($this->callFunc);
//$tree[] = $file;
} //文件处理结束
}
}
closedir( $dh );
}
else
{
exit( "IBFileSystem: can not open directory $dirName.");
}
//返回当前的$tree
//return $tree;
}
else
{
exit( "IBFileSystem: $dirName is not a directory.");
}
}
function test($arga,$argb)
{
echo $arga."<BR>".$argb."<BR>";
}
function gb2utf8($file,$filePath)
{   
    $fileext=substr($file,-4,4);
    preg_match("/".$fileext."/i",$this->extname, $matches);
    if ($matches[0]) //是文本文件扩展名为:.php,.xml,.css,.js.由数组$matches定义
    {
    $outfilename=$filePath;
    $fstr=file_get_contents($outfilename);
    $utf8score=$this->utf8_probability(&$fstr);
        if( 90<=$utf8score && $utf8score<=100)
            {    $this->utf8File++;
                //echo "<font color=red> $filePath  is encoded with utf-8 already </font><BR>";//die;
            }
        else
            {
            $fp=fopen($outfilename,'wb');
           
            $foutstr=iconv($this->encode,$this->destEncode,$fstr); //如果碰到 此文件中不存在双字节字符,则这个函数并不起作用
           
            //echo "文件原来的编码格式:$this->encode";die;           
            echo $filePath."<BR>";
            fwrite($fp,$foutstr);
           
            $this->convertedFile++;
            //echo $filePath . "<<<<<<<<< $matches[0]<BR>";
            fclose($fp);
            }
   
    }//是文本文件处理结束
    else{
        $this->notTextFile++;
        //echo $file."=========<BR>";
    }
}
function showdir($dir){
while ($file_name = readdir($dir)) {
if (($file_name != ".") && ($file_name != "..")) {
$file_list .= "<li>$file_name";
if(is_dir($file_name))
showdir($file_name);
}
}
}
 
 
 
 
 
 
function utf8_probability(&$rawtextstr) {
$score = 0;
$i = 0;
$rawtextlen = 0;
$goodbytes = 0;
$asciibytes = 0;
$rawtextarray = preg_split("//",$rawtextstr,-1, PREG_SPLIT_NO_EMPTY); //转换成char数组,如果是php5,则可使用str_split
$rawtext = array();
//var_dump($rawtextarray);die;
for($i=0;$i<count($rawtextarray);$i++)
$rawtext[] = ord($rawtextarray[$i]); //ord(char)
// Maybe also use UTF8 Byte Order Mark(BOM): EF BB BF
//BOM,某些utf8文件流的首3个字节,可以表示这个文件的编码方式
// Check to see if characters fit into acceptable ranges
//print_r($rawtext);
$rawtextlen = strlen($rawtextstr);
for ($i = 0; $i < $rawtextlen; $i++) {
if ($rawtext[$i] < 0x80) { // One byte
    $asciibytes++; // Ignore ASCII, can throw off count
} else if (0xC0 <= $rawtext[$i] && $rawtext[$i] <= 0xDF && // Two bytes
$i+1 < $rawtextlen && 0x80 <= $rawtext[$i+1] && $rawtext[$i+1] <= 0xBF) {
$goodbytes += 2; $i++;
} else if (0xE0 <= $rawtext[$i] && $rawtext[$i] <= 0xEF && // Three bytes
$i+2 < $rawtextlen && 0x80 <= $rawtext[$i+1] && $rawtext[$i+1] <= 0xBF &&
0x80 <= $rawtext[$i+2] && $rawtext[$i+2] <= 0xBF) {
$goodbytes += 3; $i+=2;
}
//if you want check just a few ,you may stop here with a score make.
//or you will be delayed when you meet lots of big files.
}
//ascii is sub of utf8
if ($asciibytes == $rawtextlen) { return 0; }
$score = (int)(100 * ($goodbytes/($rawtextlen-$asciibytes)));
// If not above 98, reduce to zero to prevent coincidental matches
if ($score > 98) {
return $score;
} else if ($score > 95 && $goodbytes > 30) {
// Allows for some (few) bad formed sequences
return $score;
} else {
return 0;
}
}

function currentpath()
{
$currentpath=realpath($this->currentName);
$pos = strrpos ($currentpath,"\\" );
$currentpath=substr($currentpath,0,$pos);
$currentpath=str_replace("\\","/",$currentpath);
echo "当前执行文件所在目录:$currentpath <BR>";
return $currentpath;
}
}//class fileconv end
?>
<FORM METHOD=POST ACTION="?">
文件名:如果你改此文件名为目录名,则转换整个目录
<input type='file' name='fname' size=90/>
原文件格式:  <select name='encode'>
<option value='ISO-8859-1'>  .iso8859-1 .latin1</option>
<option value='ISO-8859-2'>  .iso8859-2 .latin2 .cen</option>
<option value='ISO-8859-3'>  .iso8859-3 .latin3</option>
<option value='ISO-8859-4'>  .iso8859-4 .latin4</option>
<option value='ISO-8859-5'>  .iso8859-5 .latin5 .cyr .iso-ru</option>
<option value='ISO-8859-6'>  .iso8859-6 .latin6 .arb</option>
<option value='ISO-8859-7'>  .iso8859-7 .latin7 .grk</option>
<option value='ISO-8859-8'>  .iso8859-8 .latin8 .heb</option>
<option value='ISO-8859-9'>  .iso8859-9 .latin9 .trk</option>
<option value='ISO-2022-JP'> .iso2022-jp .jis</option>
<option value='ISO-2022-KR'> .iso2022-kr .kis</option>
<option value='ISO-2022-CN'> .iso2022-cn .cis</option>
<option value='Big5' selected>        .Big5      .big5</option>
<option value='WINDOWS-1251'> .cp-1251  .win-1251</option>
<option value='CP866'>      .cp866</option>
<option value='KOI8-r'>      .koi8-r .koi8-ru</option>
<option value='KOI8-ru'>    .koi8-uk .ua</option>
<option value='ISO-10646-UCS-2'> .ucs2</option>
<option value='ISO-10646-UCS-4'> .ucs4</option>
<option value='UTF-8'>      .utf8</option>
<option value='GB2312'>      .gb2312 .gb </option>
<option value='utf-7'>      .utf7</option>
<option value='utf-8'>      .utf8</option>
<option value='big5'>        .big5 .b5</option>
<option value='EUC-TW'>      .euc-tw</option>
<option value='EUC-JP'>      .euc-jp</option>
<option value='EUC-KR'>      .euc-kr</option>
<option value='shift_jis'>  .sjis</option>
</select>
转化后的格式:<select name='destencode'>
<option value='ISO-8859-1'>  .iso8859-1 .latin1</option>
<option value='ISO-8859-2'>  .iso8859-2 .latin2 .cen</option>
<option value='ISO-8859-3'>  .iso8859-3 .latin3</option>
<option value='ISO-8859-4'>  .iso8859-4 .latin4</option>
<option value='ISO-8859-5'>  .iso8859-5 .latin5 .cyr .iso-ru</option>
<option value='ISO-8859-6'>  .iso8859-6 .latin6 .arb</option>
<option value='ISO-8859-7'>  .iso8859-7 .latin7 .grk</option>
<option value='ISO-8859-8'>  .iso8859-8 .latin8 .heb</option>
<option value='ISO-8859-9'>  .iso8859-9 .latin9 .trk</option>
<option value='ISO-2022-JP'> .iso2022-jp .jis</option>
<option value='ISO-2022-KR'> .iso2022-kr .kis</option>
<option value='ISO-2022-CN'> .iso2022-cn .cis</option>
<option value='Big5'>        .Big5      .big5</option>
<option value='WINDOWS-1251'> .cp-1251  .win-1251</option>
<option value='CP866'>      .cp866</option>
<option value='KOI8-r'>      .koi8-r .koi8-ru</option>
<option value='KOI8-ru'>    .koi8-uk .ua</option>
<option value='ISO-10646-UCS-2'> .ucs2</option>
<option value='ISO-10646-UCS-4'> .ucs4</option>
<option value='UTF-8' selected>      .utf8</option>
<option value='GB2312'>      .gb2312 .gb </option>
<option value='utf-7'>      .utf7</option>
<option value='utf-8'>      .utf8</option>
<option value='big5'>        .big5 .b5</option>
<option value='EUC-TW'>      .euc-tw</option>
<option value='EUC-JP'>      .euc-jp</option>
<option value='EUC-KR'>      .euc-kr</option>
<option value='shift_jis'>  .sjis</option>
</select>
转换为目录:<INPUT TYPE="radio" NAME="changetype" value="dir">
转换为文件:<INPUT TYPE="radio" NAME="changetype" value="file" checked>
<INPUT TYPE="submit" value="转换">
</FORM>



All times are GMT +8. The time now is 09:20 PM.

Powered by vBulletin Version 3.8.7
Copyright ©2000 - 2026, Jelsoft Enterprises Ltd.