Your IP : 3.12.41.92


Current Path : /home/bitrix/ext_www/dev.shuft.com.ua/bitrix/modules/main/lib/text/
Upload File :
Current File : /home/bitrix/ext_www/dev.shuft.com.ua/bitrix/modules/main/lib/text/encoding.php

<?php
namespace Bitrix\Main\Text;

use Bitrix\Main\Loader;
use Bitrix\Main\Application;
use Bitrix\Main\Config\Configuration;
use Bitrix\Main\ErrorCollection;
use Bitrix\Main\Error;

class Encoding
{
	const PATH_TO_CONVERT_TABLES = "/bitrix/modules/main/cvtables/";

	/** @var ErrorCollection */
	protected $errors;

	protected function __construct()
	{
		$this->errors = new ErrorCollection();
	}

	/**
	 * Converts data from a source encoding to a target encoding.
	 *
	 * @param string|array|\SplFixedArray $data The data to convert. From main 16.0.10 data can be an array.
	 * @param string $charsetFrom The source encoding.
	 * @param string $charsetTo The target encoding.
	 * @param string $errorMessage Reference to a variable containing error messages.
	 * @return string|array|\SplFixedArray|bool Returns converted data or false on error.
	 */
	public static function convertEncoding($data, $charsetFrom, $charsetTo, &$errorMessage = "")
	{
		if(strcasecmp($charsetFrom, $charsetTo) == 0)
		{
			//no need to convert
			return $data;
		}

		if(is_array($data) || $data instanceof \SplFixedArray)
		{
			//let's do a recursion
			foreach($data as $key => $value)
			{
				$newKey = self::convertEncoding($key, $charsetFrom, $charsetTo, $errorMessage);
				$newValue = self::convertEncoding($value, $charsetFrom, $charsetTo, $errorMessage);

				$data[$newKey] = $newValue;

				if($newKey != $key)
				{
					unset($data[$key]);
				}
			}
			return $data;
		}
		elseif(is_string($data))
		{
			if($data == '')
			{
				return '';
			}

			$cvt = new static;

			$res = $cvt->convertByMbstring($data, $charsetFrom, $charsetTo);
			if($res === '')
			{
				$res = $cvt->convertByIconv($data, $charsetFrom, $charsetTo);
				if($res === '')
				{
					$res = $cvt->convertByTables($data, $charsetFrom, $charsetTo);
				}
			}

			$errors = $cvt->getErrors();
			if (!empty($errors))
			{
				$errorMessage .= implode("\n", $errors);
			}

			return $res;
		}
		return $data;
	}

	/**
	 * @deprecated Deprecated in main 16.0.10. Use Encoding::convertEncoding().
	 * @param $data
	 * @param $charsetFrom
	 * @param $charsetTo
	 * @param string $errorMessage
	 * @return mixed
	 */
	public static function convertEncodingArray($data, $charsetFrom, $charsetTo, &$errorMessage = "")
	{
		return self::convertEncoding($data, $charsetFrom, $charsetTo, $errorMessage);
	}

	/**
	 * @param string $string
	 * @return bool|string
	 */
	public static function convertEncodingToCurrent($string)
	{
		$isUtf8String = self::detectUtf8($string);
		$isUtf8Config = Application::isUtfMode();

		$currentCharset = null;

		if (!$isUtf8Config && $isUtf8String)
		{
			$context = Application::getInstance()->getContext();
			if ($context != null)
			{
				$culture = $context->getCulture();
				if ($culture != null)
				{
					$currentCharset = $culture->getCharset();
				}
			}
		}

		if ($currentCharset == null)
		{
			$currentCharset = Configuration::getValue("default_charset");
		}

		if ($currentCharset == null)
		{
			$currentCharset = "Windows-1251";
		}

		$fromCp = "";
		$toCp = "";
		if ($isUtf8Config && !$isUtf8String)
		{
			$fromCp = $currentCharset;
			$toCp = "UTF-8";
		}
		elseif (!$isUtf8Config && $isUtf8String)
		{
			$fromCp = "UTF-8";
			$toCp = $currentCharset;
		}

		if ($fromCp !== $toCp)
		{
			$string = self::convertEncoding($string, $fromCp, $toCp);
		}

		return $string;
	}

	/**
	 * @param string $string
	 * @return bool
	 */
	public static function detectUtf8($string)
	{
		//http://mail.nl.linux.org/linux-utf8/1999-09/msg00110.html

		if(preg_match_all("/(?:%)([0-9A-F]{2})/i", $string, $match))
		{
			$string = pack("H*", strtr(implode('', $match[1]), 'abcdef', 'ABCDEF'));
		}

		//valid UTF-8 octet sequences
		//0xxxxxxx
		//110xxxxx 10xxxxxx
		//1110xxxx 10xxxxxx 10xxxxxx
		//11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

		$prevBits8and7 = 0;
		$isUtf = 0;
		foreach(unpack("C*", $string) as $byte)
		{
			$hiBits8and7 = $byte & 0xC0;
			if ($hiBits8and7 == 0x80)
			{
				if ($prevBits8and7 == 0xC0)
					$isUtf++;
				elseif (($prevBits8and7 & 0x80) == 0x00)
					$isUtf--;
			}
			elseif ($prevBits8and7 == 0xC0)
			{
					$isUtf--;
			}
			$prevBits8and7 = $hiBits8and7;
		}
		return ($isUtf > 0);
	}

	protected function convertByMbstring($data, $charsetFrom, $charsetTo)
	{
		$res = '';

		// mb_encoding_aliases emits an E_WARNING level error if encoding is unknown
		if (extension_loaded("mbstring") && @mb_encoding_aliases($charsetFrom) !== false && @mb_encoding_aliases($charsetTo) !== false)
		{
			//For UTF-16 we have to detect the order of bytes
			//Default for mbstring extension is Big endian
			//Little endian have to pointed explicitly
			if (strtoupper($charsetFrom) == "UTF-16")
			{
				$ch = substr($data, 0, 1);
				if ($ch == "\xFF" && substr($data, 1, 1) == "\xFE")
				{
					//If Little endian found - cutoff BOF bytes and point mbstring to this fact explicitly
					$res = mb_convert_encoding(substr($data, 2), $charsetTo, "UTF-16LE");
				}
				elseif ($ch == "\xFE" && substr($data, 1, 1) == "\xFF")
				{
					//If it is Big endian, just remove BOF bytes
					$res = mb_convert_encoding(substr($data, 2), $charsetTo, $charsetFrom);
				}
				else
				{
					//Otherwise assime Little endian without BOF
					$res = mb_convert_encoding($data, $charsetTo, "UTF-16LE");
				}
			}
			else
			{
				$res = mb_convert_encoding($data, $charsetTo, $charsetFrom);
			}
		}
		return $res;
	}

	protected function convertByIconv($data, $charsetFrom, $charsetTo)
	{
		$res = '';
		if (Configuration::getValue("disable_iconv") !== true)
		{
			$utfString = false;
			if (strtoupper($charsetFrom) == "UTF-16")
			{
				$ch = substr($data, 0, 1);
				if (($ch != "\xFF") || ($ch != "\xFE"))
				{
					$utfString = "\xFF\xFE".$data;
				}
			}
			if (function_exists('iconv'))
			{
				if ($utfString)
				{
					$res = iconv($charsetFrom, $charsetTo."//IGNORE", $utfString);
				}
				else
				{
					$res = iconv($charsetFrom, $charsetTo."//IGNORE", $data);
				}

				if (!$res)
				{
					$this->errors[] = new Error("Iconv reported failure while converting string to requested character encoding.");
				}
			}
			elseif (function_exists('libiconv'))
			{
				if ($utfString)
				{
					$res = libiconv($charsetFrom, $charsetTo, $utfString);
				}
				else
				{
					$res = libiconv($charsetFrom, $charsetTo, $data);
				}

				if (!$res)
				{
					$this->errors[] = new Error("Libiconv reported failure while converting string to requested character encoding.");
				}
			}
		}
		return $res;
	}

	protected function buildConvertTable()
	{
		static $cvTables = array();

		for($i = 0, $cnt = func_num_args(); $i < $cnt; $i++)
		{
			$fileName = func_get_arg($i);

			if(isset($cvTables[$fileName]))
			{
				continue;
			}

			$pathToTable = Loader::getDocumentRoot().self::PATH_TO_CONVERT_TABLES.$fileName;
			if (!file_exists($pathToTable))
			{
				$this->errors[] = new Error(str_replace("#FILE#", $pathToTable, "File #FILE# is not found."));
				return false;
			}

			if (!is_file($pathToTable))
			{
				$this->errors[] = new Error(str_replace("#FILE#", $pathToTable, "File #FILE# is not a file."));
				return false;
			}

			if (!($hFile = fopen($pathToTable, "r")))
			{
				$this->errors[] = new Error(str_replace("#FILE#", $pathToTable, "Can not open file #FILE# for reading."));
				return false;
			}

			$cvTables[$fileName] = array();

			while (!feof($hFile))
			{
				if ($line = trim(fgets($hFile, 1024)))
				{
					if (substr($line, 0, 1) != "#")
					{
						$hexValue = preg_split("/[\\s,]+/", $line, 3);
						if (substr($hexValue[1], 0, 1) != "#")
						{
							$key = strtoupper(str_replace("0x", "", $hexValue[1]));
							$value = strtoupper(str_replace("0x", "", $hexValue[0]));
							$cvTables[$fileName][$key] = $value;
						}
					}
				}
			}

			fclose($hFile);
		}

		return $cvTables;
	}

	protected function hexToUtf($utfCharInHex)
	{
		$result = "";

		$utfCharInDec = hexdec($utfCharInHex);
		if ($utfCharInDec < 128)
			$result .= chr($utfCharInDec);
		elseif ($utfCharInDec < 2048)
			$result .= chr(($utfCharInDec >> 6) + 192).chr(($utfCharInDec & 63) + 128);
		elseif ($utfCharInDec < 65536)
			$result .= chr(($utfCharInDec >> 12) + 224).chr((($utfCharInDec >> 6) & 63) + 128).chr(($utfCharInDec & 63) + 128);
		elseif ($utfCharInDec < 2097152)
			$result .= chr($utfCharInDec >> 18 + 240).chr((($utfCharInDec >> 12) & 63) + 128).chr(($utfCharInDec >> 6) & 63 + 128). chr($utfCharInDec & 63 + 128);

		return $result;
	}

	/**
	 * @param string $sourceString
	 * @param string $charsetFrom
	 * @param string $charsetTo
	 * @return bool|string
	 */
	protected function convertByTables($sourceString, $charsetFrom, $charsetTo)
	{
		if($charsetFrom == '')
		{
			$this->errors[] = new Error("Source charset is not set.");
			return false;
		}

		if($charsetTo == '')
		{
			$this->errors[] = new Error("Destination charset is not set.");
			return false;
		}

		$charsetFrom = strtolower($charsetFrom);
		$charsetTo = strtolower($charsetTo);

		$resultString = "";
		if($charsetFrom == "ucs-2")
		{
			$convertTable = $this->buildConvertTable($charsetTo);
			if(!$convertTable)
			{
				return false;
			}
			$len = strlen($sourceString);
			for($i = 0; $i < $len; $i+=2)
			{
				$hexChar = strtoupper(dechex(ord($sourceString[$i])).dechex(ord($sourceString[$i+1])));
				$hexChar = str_pad($hexChar, 4, "0", STR_PAD_LEFT);
				if($convertTable[$charsetTo][$hexChar])
				{
					if($charsetTo != "utf-8")
					{
						$resultString .= chr(hexdec($convertTable[$charsetTo][$hexChar]));
					}
					else
					{
						$resultString .= $this->hexToUtf($convertTable[$charsetTo][$hexChar]);
					}
				}
			}
		}
		elseif($charsetFrom == "utf-16")
		{
			$convertTable = $this->buildConvertTable($charsetTo);
			if(!$convertTable)
			{
				return false;
			}

			$len = strlen($sourceString);
			for($i = 0; $i < $len; $i+=2)
			{
				$hexChar = sprintf("%02X%02X", ord($sourceString[$i+1]), ord($sourceString[$i]));
				if($convertTable[$charsetTo][$hexChar])
				{
					if($charsetTo != "utf-8")
					{
						$resultString .= chr(hexdec($convertTable[$charsetTo][$hexChar]));
					}
					else
					{
						$resultString .= $this->hexToUtf($convertTable[$charsetTo][$hexChar]);
					}
				}
			}
		}
		elseif($charsetFrom != "utf-8")
		{
			if($charsetTo != "utf-8")
			{
				$convertTable = $this->buildConvertTable($charsetFrom, $charsetTo);
			}
			else
			{
				$convertTable = $this->buildConvertTable($charsetFrom);
			}

			if(!$convertTable)
			{
				return false;
			}

			$stringLength = BinaryString::getLength($sourceString);

			for ($i = 0; $i < $stringLength; $i++)
			{
				$hexChar = strtoupper(dechex(ord($sourceString[$i])));

				if(strlen($hexChar) == 1)
				{
					$hexChar = "0".$hexChar;
				}

				if(($charsetFrom == "gsm0338") && ($hexChar == '1B'))
				{
					$i++;
					$hexChar .= strtoupper(dechex(ord($sourceString[$i])));
				}

				if($charsetTo != "utf-8")
				{
					if(in_array($hexChar, $convertTable[$charsetFrom]))
					{
						$unicodeHexChar = array_search($hexChar, $convertTable[$charsetFrom]);
						$arUnicodeHexChar = explode("+", $unicodeHexChar);
						$len = count($arUnicodeHexChar);
						for ($j = 0; $j < $len; $j++)
						{
							if (array_key_exists($arUnicodeHexChar[$j], $convertTable[$charsetTo]))
							{
								$resultString .= chr(hexdec($convertTable[$charsetTo][$arUnicodeHexChar[$j]]));
							}
							else
							{
								$this->errors[] = new Error(str_replace("#CHAR#", $sourceString[$i], "Cannot find matching char \"#CHAR#\" in destination encoding table."));
							}
						}
					}
					else
					{
						$this->errors[] =  new Error(str_replace("#CHAR#", $sourceString[$i], "Cannot find matching char \"#CHAR#\" in source encoding table."));
					}
				}
				else
				{
					if(in_array($hexChar, $convertTable[$charsetFrom]))
					{
						$unicodeHexChar = array_search($hexChar, $convertTable[$charsetFrom]);
						$arUnicodeHexChar = explode("+", $unicodeHexChar);
						$len = count($arUnicodeHexChar);
						for ($j = 0; $j < $len; $j++)
						{
							$resultString .= $this->hexToUtf($arUnicodeHexChar[$j]);
						}
					}
					else
					{
						$this->errors[] = new Error(str_replace("#CHAR#", $sourceString[$i], "Cannot find matching char \"#CHAR#\" in source encoding table."));
					}
				}
			}
		}
		else
		{
			$convertTable = $this->buildConvertTable($charsetTo);
			if(!$convertTable)
			{
				return false;
			}

			foreach($convertTable[$charsetTo] as $unicodeHexChar => $hexChar)
			{
				$EntitieOrChar = chr(hexdec($hexChar));
				$sourceString = str_replace($this->hexToUtf($unicodeHexChar), $EntitieOrChar, $sourceString);
			}
			$resultString = $sourceString;
		}

		return $resultString;
	}

	public function getErrors()
	{
		return $this->errors->toArray();
	}
}