c847bdd2274771a31587a5316dcad33aa42483b1
2 /********************************************************************************
3 * banana/utf8.php : utf8 to html entities
6 * This file is part of the banana distribution
7 * Copyright: See COPYING files that comes with this distribution
8 ********************************************************************************/
10 function utf8entities($source)
12 // array used to figure what number to decrement from character order value
13 // according to number of characters used to map unicode to ascii by utf-8
19 // the number of bits to shift each charNum by
32 $len = strlen($source);
36 $charPos = $source{$pos};
37 $asciiPos = ord($charPos);
40 $encodedString .= $charPos;
46 if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character
48 else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character
50 else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character
52 else // 1 char (lower ascii)
54 $thisLetter = substr($source, $pos, $i);
57 // process the string representing the letter to a unicode entity
58 $thisLen = strlen($thisLetter);
61 while ($thisPos < $thisLen)
63 $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));
66 $charNum = intval($thisCharOrd - $decrement[$thisLen]);
67 $decimalCode +
= ($charNum << $shift[$thisLen][$thisPos]);
71 $charNum = intval($thisCharOrd - 128);
72 $decimalCode +
= ($charNum << $shift[$thisLen][$thisPos]);
78 $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?
3:5, '0', STR_PAD_LEFT
).';';
79 $encodedString .= $encodedLetter;
82 return $encodedString;