年年我发



返回   年年我发 > 网站建设 > 代码交流
用户名
密码
 
注册账号 论坛帮助 会员列表 日历事件网赚工具 标记版面已读

代码交流 ASP,PHP,JSP等网站源代码下载与交流。

回复
 
主题工具 搜索本主题 显示模式
  #1   IP: 59.60.149.106
旧 2008-03-31, 11:28 AM
topvip topvip 当前离线
超级版主
 
注册日期: 2006-01-04
帖子: 618
精华: 11
现金: 4905 元
资产: 7353 元
声望: 10 topvip 正向着好的方向发展
默认 文件批量转换器(从ansi 到 Utf-8) --转

文件批量转换器(从ANSI 到 UTF-8)(PHP)
其中几个函数很好用,我花了一天时间
直接创建多层目录mymkdir($dirname),
批处理目录下的文件function listDirTree( $dirName = null ,$callFunction) 可自定义处理函数。
如:
PHP 代码:
代码:
$callFunc="backupFile(\"$backupPath\",\$file,\$filePath,\$dirName);"; listDirTree($operateDir,$callFunc);

将文本中 x=y
u=a
提取为数组的函数
function myExplode($originalStr,$firstTag,$secondTag)
将文件转换为utf-8编码的函数。function gb2utf8($file,$filePath)
判断文件是否为utf-8编码的函数 function utf8_probability(&$rawtextstr)



PHP 代码:
代码:
<?php //header('Content-Type: text/html; charset=utf-8'); $fname=str_replace("\\\\","/",$_POST['fname']); $encode=$_POST['encode']; $changeType=$_POST['changetype']; if($changeType == "dir")$fname = dirname($fname); echo "需要转换的文件目录:".dirname($fname) ."<BR>"; echo "已有的文件编码格式:$encode"; echo "<PRE>"; if($fname) { $fc= new fileconv(); $fc->changeType="file"; if(is_dir($fname)) { $fc->changeType="dir"; $fc->operateDir=$fname; $fc->operateDirLen=strlen($fc->operateDir); } else if(is_file($fname)) { $fc->operateFile=$fname; $fc->operateDirLen=strlen($fc->operateFile); } else{ echo "fatal error";die; } $fc->backupPath="d:/usr/www/html/bkup_test/"; $fc->mainCallFunc="\$this->gb2utf8(\$file,\$filePath);"; if($encode){$fc->encode=$encode;} $fc->mainsub(); } class fileconv{ var $currentName; var $changeType; //=dir 则操作目录,=file则操作文件。 var $operateFile; //如果this->changeType为file单个文件,那么这个值就是要操作的单个文件。 var $encode; /** * destEncode is the encode type you want to convert the text file to . * *access public */ var $destEncode; var $extname; //有效文本文件的扩展名。 var $operateDir; //需要操作的目录 var $operateDirLen; //备份过程中用来截取子目录名的时候用 var $backupPath; //需要备份到的目录,如此目录不存在则自动创建 var $callFunc; //操作函数,用于lisDirTree里的调用 var $mainCallFunc; //主操作函数. function fileconv() { $currentName="fileconv.php"; //初始化操作数据文件类型 $this->extname = ".php;.xml;html;.htm;.css;.txt;.js;.ini;.sql;"; $this->encode = "gb2312"; $this->destEncode = "utf-8"; //初始化结果数据。 $this->utf8File=0; $this->convertedFile=0; $this->notTextFile=0; } function mainsub() { //echo "2005-6-14mainsub start $this->backupPath";die; if($this->changeType=="dir"){ if(strpos($this->backupPath,$this->operateDir)===false) { clearstatcache(); if(!is_dir($this->backupPath))$this->mymkdir($this->backupPath); $this->backup($this->operateDir,$this->backupPath); $this->callFunc="$this->gb2utf8(\$file,\$filePath);"; $this->callFunc=$this->mainCallFunc; $this->listDirTree($this->operateDir); } else { echo "Fetal Error You must change your backupdir ,<BR>It can't be under the directory you want to backup"; die; } }// />转换的为目录 else if($this->changeType=="file"){ $posfile=strrpos($this->operateFile,"/"); $file=substr($this->operateFile,$posfile+1); $filepath=substr($this->operateFile,0,$posfile); $this->backupPath=$this->currentpath()."/testbk/"; //echo "$file $filepath $this->backupPath<BR>"; $this->backupFile($file,$this->operateFile,$filepath); $this->gb2utf8($file,$this->operateFile); }// />转换的为文件 $this->echoResult(); }//mainsub end function backup() { if($this->readtag($this->backupPath)==0)//如果没有备份过,那么,执行备份 { $this->callFunc="\$this->backupFile(\$file,\$filePath,\$dirName);"; $this->listDirTree($this->operateDir); $this->setTag($this->backupPath); //设置备份标记 } else {//如果备份过了,那么,exit; echo "backuped already"; } }//function backup end function backupFile($file,$filePath,$dirName) { $subdir = substr($dirName."/",$this->operateDirLen+1); $this->mymkdir($this->backupPath.$subdir); //创建子目录 $bkfile=$this->backupPath.$subdir.$file; echo "子目录:$subdir 备份文件为: $bkfile 原始文件为:$filePath<BR>"; $fstr=file_get_contents($filePath); $fhandle=fopen($bkfile,"wb"); fwrite($fhandle,$fstr); fclose($fhandle); } function echoResult() {global $utf8File,$convertedFile,$notTextFile; //echo (microtime()-$tm1)."秒<BR>"; echo "<BR>utf8文件:".$this->utf8File; echo "<BR>被转换的文件总数:".$this->convertedFile; echo "<BR>非文本文件数目:".$this->notTextFile; } function readtag( $backupDir ) { //echo "$backupDir "; if(!is_dir($backupDir)){ $this->mymkdir($backupDir); } if(!is_file($backupDir."tag.ini")) //如果tag.ini不存在,则建立并初始化 { if(!$fp=fopen($backupDir."tag.ini","w")){ echo "error while reading or creating the tag.ini "; } else { $str="backuped=0"; if(!fwrite($fp,$str)){echo "error while init the tag.ini ";} fclose($fp); } $this->gb2utf8("tag.ini",$backupDir."tag.ini"); //将新建的tag.ini转为utf-8编码 return 0; } else //如果tag.ini已经存在,读取其中的配置 { $backupTag=file_get_contents($backupDir."tag.ini"); $tagTree=$this->myExplode($backupTag,"/\n/","="); //print_r($tagTree); //tagtree存储所有tag.ini里的 x=y 的信息 return $tagTree['backuped']; } } function setTag($backupDir) { $tagini=file_get_contents($backupDir."tag.ini"); if(!$fp=fopen($backupDir."tag.ini","w")){ echo "error while reading or creating the tag.ini "; } else { $str="backuped=1"; $pattern="/backuped=0/"; if(!strpos($tagini,"backuped=",0)) { $tagini=$str; } else { $tagini=preg_replace($pattern,$str,$tagini); } if(!fwrite($fp,$tagini)) { echo "error while set the tag.ini "; } fclose($fp); } } function myExplode($originalStr,$firstTag,$secondTag) { $firstArray=preg_split($firstTag,$originalStr); foreach($firstArray as $first) { $secondArray=explode($secondTag,$first); $tagTree[$secondArray[0]]=$secondArray[1]; } return $tagTree; } function mymkdir($mainDir) { if(!is_dir($mainDir)) { global $validFatherDir; $this->getFatherDir($mainDir); $fullLen=strlen($mainDir); //控制循环 //echo "需要创建的目录".$mainDir."长度为 $fullLen <BR>";die; //echo "最高有效的父目录".$validFatherDir."<BR>"; $validDirLen=strlen($validFatherDir); while(($i<10 )&&($fullLen>$validDirLen+1)) { //echo "父目录长度:$validDirLen<BR>"; $pos=strpos($mainDir,"/",$validDirLen+1); //echo "/所在位置:$pos<BR>"; $dirname=substr($mainDir,$validDirLen,$pos-$validDirLen); //echo "正在创建目录".$validFatherDir.$dirname."<BR>"; $validFatherDir.=$dirname; if(!mkdir($validFatherDir)) { echo "Fetal Error accoured while create directory";die; } $validDirLen=strlen($validFatherDir); $i++; } } }//mymkdir function end function getFatherDir($mainDir) { global $validFatherDir; $pos = strrpos($mainDir,"/"); if ($pos===false) { // not found echo "Fetal Error While Get Valid Father Dir";die; return false; } else { $fatherDir=substr($mainDir,0,$pos); if(!is_dir($fatherDir)) { echo "<font color=red >$fatherDir is not exists<BR></font>"; $this->getFatherDir($fatherDir); } else { $validFatherDir= $fatherDir; } } } /** 函数 listDirTree( $dirName = null ) * 功能 列出目录下所有文件及子目录 * 参数 $dirName 目录名称 * 返回 目录结构数组 false为失败 */ function listDirTree( $dirName = null ) {//global $tree; if( empty( $dirName ) ) exit( "IBFileSystem: directory is empty." ); if( is_dir( $dirName ) ) { if( $dh = opendir( $dirName ) ) { //$tree = array(); while( ( $file = readdir( $dh ) ) !== false ) { if( $file != "." && $file != ".." ) { $filePath = $dirName . "/" . $file; if( is_dir( $filePath ) )//为目录,递归 { //$tree[$file] = $this->listDirTree( $filePath ); $this->listDirTree( $filePath ); } else//为文件,进行处理 { //echo "文件处理函数为 $this->callFunc <BR>"; eval($this->callFunc); //$tree[] = $file; } //文件处理结束 } } closedir( $dh ); } else { exit( "IBFileSystem: can not open directory $dirName."); } //返回当前的$tree //return $tree; } else { exit( "IBFileSystem: $dirName is not a directory."); } } function test($arga,$argb) { echo $arga."<BR>".$argb."<BR>"; } function gb2utf8($file,$filePath) { $fileext=substr($file,-4,4); preg_match("/".$fileext."/i",$this->extname, $matches); if ($matches[0]) //是文本文件扩展名为:.php,.xml,.css,.js.由数组$matches定义 { $outfilename=$filePath; $fstr=file_get_contents($outfilename); $utf8score=$this->utf8_probability(&$fstr); if( 90<=$utf8score && $utf8score<=100) { $this->utf8File++; //echo "<font color=red> $filePath is encoded with utf-8 already </font><BR>";//die; } else { $fp=fopen($outfilename,'wb'); $foutstr=iconv($this->encode,$this->destEncode,$fstr); //如果碰到 此文件中不存在双字节字符,则这个函数并不起作用 //echo "文件原来的编码格式:$this->encode";die; echo $filePath."<BR>"; fwrite($fp,$foutstr); $this->convertedFile++; //echo $filePath . "<<<<<<<<< $matches[0]<BR>"; fclose($fp); } }//是文本文件处理结束 else{ $this->notTextFile++; //echo $file."=========<BR>"; } } function showdir($dir){ while ($file_name = readdir($dir)) { if (($file_name != ".") && ($file_name != "..")) { $file_list .= "<li>$file_name"; if(is_dir($file_name)) showdir($file_name); } } } function utf8_probability(&$rawtextstr) { $score = 0; $i = 0; $rawtextlen = 0; $goodbytes = 0; $asciibytes = 0; $rawtextarray = preg_split("//",$rawtextstr,-1, PREG_SPLIT_NO_EMPTY); //转换成char数组,如果是php5,则可使用str_split $rawtext = array(); //var_dump($rawtextarray);die; for($i=0;$i<count($rawtextarray);$i++) $rawtext[] = ord($rawtextarray[$i]); //ord(char) // Maybe also use UTF8 Byte Order Mark(BOM): EF BB BF //BOM,某些utf8文件流的首3个字节,可以表示这个文件的编码方式 // Check to see if characters fit into acceptable ranges //print_r($rawtext); $rawtextlen = strlen($rawtextstr); for ($i = 0; $i < $rawtextlen; $i++) { if ($rawtext[$i] < 0x80) { // One byte $asciibytes++; // Ignore ASCII, can throw off count } else if (0xC0 <= $rawtext[$i] && $rawtext[$i] <= 0xDF && // Two bytes $i+1 < $rawtextlen && 0x80 <= $rawtext[$i+1] && $rawtext[$i+1] <= 0xBF) { $goodbytes += 2; $i++; } else if (0xE0 <= $rawtext[$i] && $rawtext[$i] <= 0xEF && // Three bytes $i+2 < $rawtextlen && 0x80 <= $rawtext[$i+1] && $rawtext[$i+1] <= 0xBF && 0x80 <= $rawtext[$i+2] && $rawtext[$i+2] <= 0xBF) { $goodbytes += 3; $i+=2; } //if you want check just a few ,you may stop here with a score make. //or you will be delayed when you meet lots of big files. } //ascii is sub of utf8 if ($asciibytes == $rawtextlen) { return 0; } $score = (int)(100 * ($goodbytes/($rawtextlen-$asciibytes))); // If not above 98, reduce to zero to prevent coincidental matches if ($score > 98) { return $score; } else if ($score > 95 && $goodbytes > 30) { // Allows for some (few) bad formed sequences return $score; } else { return 0; } } function currentpath() { $currentpath=realpath($this->currentName); $pos = strrpos ($currentpath,"\\" ); $currentpath=substr($currentpath,0,$pos); $currentpath=str_replace("\\","/",$currentpath); echo "当前执行文件所在目录:$currentpath <BR>"; return $currentpath; } }//class fileconv end ?> <FORM METHOD=POST ACTION="?"> 文件名:如果你改此文件名为目录名,则转换整个目录 <input type='file' name='fname' size=90/> 原文件格式: <select name='encode'> <option value='ISO-8859-1'> .iso8859-1 .latin1</option> <option value='ISO-8859-2'> .iso8859-2 .latin2 .cen</option> <option value='ISO-8859-3'> .iso8859-3 .latin3</option> <option value='ISO-8859-4'> .iso8859-4 .latin4</option> <option value='ISO-8859-5'> .iso8859-5 .latin5 .cyr .iso-ru</option> <option value='ISO-8859-6'> .iso8859-6 .latin6 .arb</option> <option value='ISO-8859-7'> .iso8859-7 .latin7 .grk</option> <option value='ISO-8859-8'> .iso8859-8 .latin8 .heb</option> <option value='ISO-8859-9'> .iso8859-9 .latin9 .trk</option> <option value='ISO-2022-JP'> .iso2022-jp .jis</option> <option value='ISO-2022-KR'> .iso2022-kr .kis</option> <option value='ISO-2022-CN'> .iso2022-cn .cis</option> <option value='Big5' selected> .Big5 .big5</option> <option value='WINDOWS-1251'> .cp-1251 .win-1251</option> <option value='CP866'> .cp866</option> <option value='KOI8-r'> .koi8-r .koi8-ru</option> <option value='KOI8-ru'> .koi8-uk .ua</option> <option value='ISO-10646-UCS-2'> .ucs2</option> <option value='ISO-10646-UCS-4'> .ucs4</option> <option value='UTF-8'> .utf8</option> <option value='GB2312'> .gb2312 .gb </option> <option value='utf-7'> .utf7</option> <option value='utf-8'> .utf8</option> <option value='big5'> .big5 .b5</option> <option value='EUC-TW'> .euc-tw</option> <option value='EUC-JP'> .euc-jp</option> <option value='EUC-KR'> .euc-kr</option> <option value='shift_jis'> .sjis</option> </select> 转化后的格式:<select name='destencode'> <option value='ISO-8859-1'> .iso8859-1 .latin1</option> <option value='ISO-8859-2'> .iso8859-2 .latin2 .cen</option> <option value='ISO-8859-3'> .iso8859-3 .latin3</option> <option value='ISO-8859-4'> .iso8859-4 .latin4</option> <option value='ISO-8859-5'> .iso8859-5 .latin5 .cyr .iso-ru</option> <option value='ISO-8859-6'> .iso8859-6 .latin6 .arb</option> <option value='ISO-8859-7'> .iso8859-7 .latin7 .grk</option> <option value='ISO-8859-8'> .iso8859-8 .latin8 .heb</option> <option value='ISO-8859-9'> .iso8859-9 .latin9 .trk</option> <option value='ISO-2022-JP'> .iso2022-jp .jis</option> <option value='ISO-2022-KR'> .iso2022-kr .kis</option> <option value='ISO-2022-CN'> .iso2022-cn .cis</option> <option value='Big5'> .Big5 .big5</option> <option value='WINDOWS-1251'> .cp-1251 .win-1251</option> <option value='CP866'> .cp866</option> <option value='KOI8-r'> .koi8-r .koi8-ru</option> <option value='KOI8-ru'> .koi8-uk .ua</option> <option value='ISO-10646-UCS-2'> .ucs2</option> <option value='ISO-10646-UCS-4'> .ucs4</option> <option value='UTF-8' selected> .utf8</option> <option value='GB2312'> .gb2312 .gb </option> <option value='utf-7'> .utf7</option> <option value='utf-8'> .utf8</option> <option value='big5'> .big5 .b5</option> <option value='EUC-TW'> .euc-tw</option> <option value='EUC-JP'> .euc-jp</option> <option value='EUC-KR'> .euc-kr</option> <option value='shift_jis'> .sjis</option> </select> 转换为目录:<INPUT TYPE="radio" NAME="changetype" value="dir"> 转换为文件:<INPUT TYPE="radio" NAME="changetype" value="file" checked> <INPUT TYPE="submit" value="转换"> </FORM>
回复时引用此帖

wellcome to www.0058.net
回复


当前查看此主题的会员: 1 (0 位会员和 1 位游客)
 
主题工具 搜索本主题
搜索本主题:

高级搜索
显示模式

发帖规则
不可以发表新主题
不可以回复主题
不可以上传附件
不可以编辑您的帖子

论坛启用 BB 代码
论坛禁用 表情符号
论坛启用 [IMG] 代码
论坛禁用 HTML 代码
论坛跳转


所有时间均为北京时间. 现在的时间是 02:50 PM, Free Patent & Patent Ideas


Powered by vBulletin 3.5.2 Chinese Exhibition Happy New Year
©2000 - 2009,Jelsoft Enterprises Ltd  Desiccant Molecular Sieve Weight Loss Bentonite Trade Manager