<?php
// +----------------------------------------------------------------------
// | ThinkPHP [ WE CAN DO IT JUST THINK IT ]
// +----------------------------------------------------------------------
// | Copyright (c) 2009 http://thinkphp.cn All rights reserved.
// +----------------------------------------------------------------------
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
// +----------------------------------------------------------------------
// | Author: liu21st <liu21st@gmail.com>
// +----------------------------------------------------------------------
namespace Org\Util;
class CodeSwitch {
    // 錯誤信息
    static private $error = array();
    // 提示信息
    static private $info = array();
    // 記錄錯誤
    static private function error($msg) {
        self::$error[]   =  $msg;
    }
    // 記錄信息
    static private function info($info) {
        self::$info[]     = $info;
    }
	/**
     * 編碼轉換函數,對整個文件進行編碼轉換
	 * 支持以下轉換
	 * GB2312、UTF-8 WITH BOM轉換為UTF-8
	 * UTF-8、UTF-8 WITH BOM轉換為GB2312
     * @access public
     * @param string $filename		文件名
	 * @param string $out_charset	轉換後的文件編碼,與iconv使用的參數一致
     * @return void
     */
	static function DetectAndSwitch($filename,$out_charset) {
		$fpr = fopen($filename,"r");
		$char1 = fread($fpr,1);
		$char2 = fread($fpr,1);
		$char3 = fread($fpr,1);

		$originEncoding = "";

		if($char1==chr(239) && $char2==chr(187) && $char3==chr(191))//UTF-8 WITH BOM
			$originEncoding = "UTF-8 WITH BOM";
		elseif($char1==chr(255) && $char2==chr(254))//UNICODE LE
		{
			self::error("不支持從UNICODE LE轉換到UTF-8或GB編碼");
			fclose($fpr);
			return;
		}elseif($char1==chr(254) && $char2==chr(255)){//UNICODE BE
			self::error("不支持從UNICODE BE轉換到UTF-8或GB編碼");
			fclose($fpr);
			return;
		}else{//沒有文件頭,可能是GB或UTF-8
			if(rewind($fpr)===false){//回到文件開始部分,準備逐字節讀取判斷編碼
				self::error($filename."文件指針後移失敗");
				fclose($fpr);
				return;
			}

			while(!feof($fpr)){
				$char = fread($fpr,1);
				//對於英文,GB和UTF-8都是單字節的ASCII碼小於128的值
				if(ord($char)<128)
					continue;

				//對於漢字GB編碼第一個字節是110*****第二個字節是10******(有特例,比如聯字)
				//UTF-8編碼第一個字節是1110****第二個字節是10******第三個字節是10******
				//按位與出來結果要跟上面非星號相同,所以應該先判斷UTF-8
				//因為使用GB的掩碼按位與,UTF-8的111得出來的也是110,所以要先判斷UTF-8
				if((ord($char)&224)==224) {
					//第一個字節判斷通過
					$char = fread($fpr,1);
					if((ord($char)&128)==128) {
						//第二個字節判斷通過
						$char = fread($fpr,1);
						if((ord($char)&128)==128) {
							$originEncoding = "UTF-8";
							break;
						}
					}
				}
				if((ord($char)&192)==192) {
					//第一個字節判斷通過
					$char = fread($fpr,1);
					if((ord($char)&128)==128) {
						//第二個字節判斷通過
						$originEncoding = "GB2312";
						break;
					}
				}
			}
		}

		if(strtoupper($out_charset)==$originEncoding) {
			self::info("文件".$filename."轉碼檢查完成,原始文件編碼".$originEncoding);
			fclose($fpr);
		}else {
			//文件需要轉碼
			$originContent = "";

			if($originEncoding == "UTF-8 WITH BOM") {
				//跳過三個字節,把後面的內容複製一遍得到utf-8的內容
				fseek($fpr,3);
				$originContent = fread($fpr,filesize($filename)-3);
				fclose($fpr);
			}elseif(rewind($fpr)!=false){//不管是UTF-8還是GB2312,回到文件開始部分,讀取內容
				$originContent = fread($fpr,filesize($filename));
				fclose($fpr);
			}else{
				self::error("文件編碼不正確或指針後移失敗");
				fclose($fpr);
				return;
			}

			//轉碼並保存文件
			$content = iconv(str_replace(" WITH BOM","",$originEncoding),strtoupper($out_charset),$originContent);
			$fpw = fopen($filename,"w");
			fwrite($fpw,$content);
			fclose($fpw);

			if($originEncoding!="")
				self::info("對文件".$filename."轉碼完成,原始文件編碼".$originEncoding.",轉換後文件編碼".strtoupper($out_charset));
			elseif($originEncoding=="")
				self::info("文件".$filename."中沒有出現中文,但是可以斷定不是帶BOM的UTF-8編碼,沒有進行編碼轉換,不影響使用");
		}
	}

	/**
     * 目錄遍歷函數
     * @access public
     * @param string $path		要遍歷的目錄名
     * @param string $mode		遍歷模式,一般取FILES,這樣只返回帶路徑的文件名
     * @param array $file_types		文件後綴過濾數組
	 * @param int $maxdepth		遍歷深度,-1表示遍歷到最底層
     * @return void
     */
	static function searchdir($path,$mode = "FULL",$file_types = array(".html",".php"),$maxdepth = -1,$d = 0) {
	   if(substr($path,strlen($path)-1) != '/')
		   $path .= '/';
	   $dirlist = array();
	   if($mode != "FILES")
			$dirlist[] = $path;
	   if($handle = @opendir($path)) {
		   while(false !== ($file = readdir($handle)))
		   {
			   if($file != '.' && $file != '..')
			   {
				   $file = $path.$file ;
				   if(!is_dir($file))
				   {
						if($mode != "DIRS")
						{
							$extension = "";
							$extpos = strrpos($file, '.');
							if($extpos!==false)
								$extension = substr($file,$extpos,strlen($file)-$extpos);
							$extension=strtolower($extension);
							if(in_array($extension, $file_types))
								$dirlist[] = $file;
						}
				   }
				   elseif($d >= 0 && ($d < $maxdepth || $maxdepth < 0))
				   {
					   $result = self::searchdir($file.'/',$mode,$file_types,$maxdepth,$d + 1) ;
					   $dirlist = array_merge($dirlist,$result);
				   }
			   }
		   }
		   closedir ( $handle ) ;
	   }
	   if($d == 0)
		   natcasesort($dirlist);

	   return($dirlist) ;
	}

	/**
     * 對整個項目目錄中的PHP和HTML文件行進編碼轉換
     * @access public
     * @param string $app		要遍歷的項目路徑
     * @param string $mode		遍歷模式,一般取FILES,這樣只返回帶路徑的文件名
     * @param array $file_types		文件後綴過濾數組
     * @return void
     */
	static function CodingSwitch($app = "./",$charset='UTF-8',$mode = "FILES",$file_types = array(".html",".php")) {
		self::info("注意: 程序使用的文件編碼檢測算法可能對某些特殊字符不適用");
		$filearr = self::searchdir($app,$mode,$file_types);
		foreach($filearr as $file)
			self::DetectAndSwitch($file,$charset);
	}

    static public function getError() {
        return self::$error;
    }

    static public function getInfo() {
        return self::$info;
    }
}