Array and string offset access syntax with curly braces is no longer supported in
Encoding.php on line 171
[![Array and string offset access syntax with curly braces is no longer supported in
Encoding.php on line 171][1]][1]
error:Array and string offset access syntax with curly braces is no longer supported in
Encoding.php on line 171
namespace ForceUTF8;
class Encoding
{
const ICONV_TRANSLIT = “TRANSLIT”;
const ICONV_IGNORE = “IGNORE”;
const WITHOUT_ICONV = “”;
protected static $win1252ToUtf8 = array(
128 => “xe2x82xac”,
130 => “xe2x80x9a”,
131 => “xc6x92”,
132 => “xe2x80x9e”,
133 => “xe2x80xa6”,
134 => “xe2x80xa0”,
135 => “xe2x80xa1”,
136 => “xcbx86”,
137 => “xe2x80xb0”,
138 => “xc5xa0”,
139 => “xe2x80xb9”,
140 => “xc5x92”,
142 => “xc5xbd”,
145 => “xe2x80x98”,
146 => “xe2x80x99”,
147 => “xe2x80x9c”,
148 => “xe2x80x9d”,
149 => “xe2x80xa2”,
150 => “xe2x80x93”,
151 => “xe2x80x94”,
152 => “xcbx9c”,
153 => “xe2x84xa2”,
154 => “xc5xa1”,
155 => “xe2x80xba”,
156 => “xc5x93”,
158 => “xc5xbe”,
159 => “xc5xb8”
);
protected static $brokenUtf8ToUtf8 = array(
“xc2x80” => “xe2x82xac”,
“xc2x82” => “xe2x80x9a”,
“xc2x83” => “xc6x92”,
“xc2x84” => “xe2x80x9e”,
“xc2x85” => “xe2x80xa6”,
“xc2x86” => “xe2x80xa0”,
“xc2x87” => “xe2x80xa1”,
“xc2x88” => “xcbx86”,
“xc2x89” => “xe2x80xb0”,
“xc2x8a” => “xc5xa0”,
“xc2x8b” => “xe2x80xb9”,
“xc2x8c” => “xc5x92”,
“xc2x8e” => “xc5xbd”,
“xc2x91” => “xe2x80x98”,
“xc2x92” => “xe2x80x99”,
“xc2x93” => “xe2x80x9c”,
“xc2x94” => “xe2x80x9d”,
“xc2x95” => “xe2x80xa2”,
“xc2x96” => “xe2x80x93”,
“xc2x97” => “xe2x80x94”,
“xc2x98” => “xcbx9c”,
“xc2x99” => “xe2x84xa2”,
“xc2x9a” => “xc5xa1”,
“xc2x9b” => “xe2x80xba”,
“xc2x9c” => “xc5x93”,
“xc2x9e” => “xc5xbe”,
“xc2x9f” => “xc5xb8”
);
protected static $utf8ToWin1252 = array(
“xe2x82xac” => “x80”,
“xe2x80x9a” => “x82”,
“xc6x92” => “x83”,
“xe2x80x9e” => “x84”,
“xe2x80xa6” => “x85”,
“xe2x80xa0” => “x86”,
“xe2x80xa1” => “x87”,
“xcbx86” => “x88”,
“xe2x80xb0” => “x89”,
“xc5xa0” => “x8a”,
“xe2x80xb9” => “x8b”,
“xc5x92” => “x8c”,
“xc5xbd” => “x8e”,
“xe2x80x98” => “x91”,
“xe2x80x99” => “x92”,
“xe2x80x9c” => “x93”,
“xe2x80x9d” => “x94”,
“xe2x80xa2” => “x95”,
“xe2x80x93” => “x96”,
“xe2x80x94” => “x97”,
“xcbx9c” => “x98”,
“xe2x84xa2” => “x99”,
“xc5xa1” => “x9a”,
“xe2x80xba” => “x9b”,
“xc5x93” => “x9c”,
“xc5xbe” => “x9e”,
“xc5xb8” => “x9f”
);
static function toUTF8($text)
{
/**
* Function ForceUTF8Encoding::toUTF8
*
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
*
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
*
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
*
* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
* are followed by any of these: ("group B")
* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
* is also a valid unicode character, and will be left unchanged.
*
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
* 3) when any of these: ðñòó are followed by THREE chars from group B.
*
* @name toUTF8
* @param string $text Any string.
* @return string The same string, UTF8 encoded
*
*/
if (is_array($text)) {
foreach ($text as $k => $v) {
$text[$k] = self::toUTF8($v);
}
return $text;
}
if (!is_string($text)) {
return $text;
}
$max = self::strlen($text);
$buf = "";
for ($i = 0; $i < $max; $i++) {
$c1 = $text{$i};
if ($c1 >= "xc0") { //Should be converted to UTF8, if it's not UTF8 already
$c2 = $i + 1 >= $max ? "x00" : $text{$i + 1};
$c3 = $i + 2 >= $max ? "x00" : $text{$i + 2};
$c4 = $i + 3 >= $max ? "x00" : $text{$i + 3};
if ($c1 >= "xc0" & $c1 <= "xdf") { //looks like 2 bytes UTF8
if ($c2 >= "x80" && $c2 <= "xbf") { //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2;
$i++;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "xc0");
$cc2 = ($c1 & "x3f") | "x80";
$buf .= $cc1 . $cc2;
}
} elseif ($c1 >= "xe0" & $c1 <= "xef") { //looks like 3 bytes UTF8
if ($c2 >= "x80" && $c2 <= "xbf" && $c3 >= "x80" && $c3 <= "xbf") { //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3;
$i = $i + 2;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "xc0");
$cc2 = ($c1 & "x3f") | "x80";
$buf .= $cc1 . $cc2;
}
} elseif ($c1 >= "xf0" & $c1 <= "xf7") { //looks like 4 bytes UTF8
if ($c2 >= "x80" && $c2 <= "xbf" && $c3 >= "x80" && $c3 <= "xbf" && $c4 >= "x80" && $c4 <= "xbf") { //yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3 . $c4;
$i = $i + 3;
} else { //not valid UTF8. Convert it.
$cc1 = (chr(ord($c1) / 64) | "xc0");
$cc2 = ($c1 & "x3f") | "x80";
$buf .= $cc1 . $cc2;
}
} else { //doesn't look like UTF8, but should be converted
$cc1 = (chr(ord($c1) / 64) | "xc0");
$cc2 = (($c1 & "x3f") | "x80");
$buf .= $cc1 . $cc2;
}
} elseif (($c1 & "xc0") == "x80") { // needs conversion
if (isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
$buf .= self::$win1252ToUtf8[ord($c1)];
} else {
$cc1 = (chr(ord($c1) / 64) | "xc0");
$cc2 = (($c1 & "x3f") | "x80");
$buf .= $cc1 . $cc2;
}
} else { // it doesn't need conversion
$buf .= $c1;
}
}
return $buf;
}
static function toWin1252($text, $option = self::WITHOUT_ICONV)
{
if (is_array($text)) {
foreach ($text as $k => $v) {
$text[$k] = self::toWin1252($v, $option);
}
return $text;
} elseif (is_string($text)) {
return static::utf8_decode($text, $option);
} else {
return $text;
}
}
static function toISO8859($text)
{
return self::toWin1252($text);
}
static function toLatin1($text)
{
return self::toWin1252($text);
}
static function fixUTF8($text, $option = self::WITHOUT_ICONV)
{
if (is_array($text)) {
foreach ($text as $k => $v) {
$text[$k] = self::fixUTF8($v, $option);
}
return $text;
}
$last = "";
while ($last <> $text) {
$last = $text;
$text = self::toUTF8(static::utf8_decode($text, $option));
}
$text = self::toUTF8(static::utf8_decode($text, $option));
return $text;
}
static function UTF8FixWin1252Chars($text)
{
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
// See: http://en.wikipedia.org/wiki/Windows-1252
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
}
static function removeBOM($str = "")
{
if (substr($str, 0, 3) == pack("CCC", 0xef, 0xbb, 0xbf)) {
$str = substr($str, 3);
}
return $str;
}
protected static function strlen($text)
{
return (function_exists('mb_strlen') && ((int)ini_get('mbstring.func_overload')) & 2) ?
mb_strlen($text, '8bit') : strlen($text);
}
public static function normalizeEncoding($encodingLabel)
{
$encoding = strtoupper($encodingLabel);
$encoding = preg_replace('/[^a-zA-Z0-9s]/', '', $encoding);
$equivalences = array(
'ISO88591' => 'ISO-8859-1',
'ISO8859' => 'ISO-8859-1',
'ISO' => 'ISO-8859-1',
'LATIN1' => 'ISO-8859-1',
'LATIN' => 'ISO-8859-1',
'UTF8' => 'UTF-8',
'UTF' => 'UTF-8',
'WIN1252' => 'ISO-8859-1',
'WINDOWS1252' => 'ISO-8859-1'
);
if (empty($equivalences[$encoding])) {
return 'UTF-8';
}
return $equivalences[$encoding];
}
public static function encode($encodingLabel, $text)
{
$encodingLabel = self::normalizeEncoding($encodingLabel);
if ($encodingLabel == 'ISO-8859-1') return self::toLatin1($text);
return self::toUTF8($text);
}
protected static function utf8_decode($text, $option)
{
if ($option == self::WITHOUT_ICONV || !function_exists('iconv')) {
$o = utf8_decode(
str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
);
} else {
$o = iconv("UTF-8", "Windows-1252" . ($option == self::ICONV_TRANSLIT ? '//TRANSLIT' : ($option == self::ICONV_IGNORE ? '//IGNORE' : '')), $text);
}
return $o;
}
}
?>