Commit | Line | Data |
---|---|---|
382606fb | 1 | <?php\r |
172d5dd2 PHM |
2 | /********************************************************************************\r |
3 | * banana/utf8.php : utf8 to html entities\r | |
4 | * ---------------\r | |
5 | *\r | |
6 | * This file is part of the banana distribution\r | |
7 | * Copyright: See COPYING files that comes with this distribution\r | |
8 | ********************************************************************************/\r | |
382606fb | 9 | \r |
382606fb PHM |
10 | function utf8entities($source)\r |
11 | {\r | |
12 | // array used to figure what number to decrement from character order value \r | |
13 | // according to number of characters used to map unicode to ascii by utf-8\r | |
14 | $decrement[4] = 240;\r | |
15 | $decrement[3] = 224;\r | |
16 | $decrement[2] = 192;\r | |
17 | $decrement[1] = 0;\r | |
18 | \r | |
19 | // the number of bits to shift each charNum by\r | |
20 | $shift[1][0] = 0;\r | |
21 | $shift[2][0] = 6;\r | |
22 | $shift[2][1] = 0;\r | |
23 | $shift[3][0] = 12;\r | |
24 | $shift[3][1] = 6;\r | |
25 | $shift[3][2] = 0;\r | |
26 | $shift[4][0] = 18;\r | |
27 | $shift[4][1] = 12;\r | |
28 | $shift[4][2] = 6;\r | |
29 | $shift[4][3] = 0;\r | |
30 | \r | |
31 | $pos = 0;\r | |
32 | $len = strlen($source);\r | |
33 | $encodedString = '';\r | |
34 | while ($pos < $len)\r | |
35 | {\r | |
36 | $charPos = $source{$pos};\r | |
37 | $asciiPos = ord($charPos);\r | |
38 | if ($asciiPos < 128)\r | |
39 | {\r | |
40 | $encodedString .= $charPos;\r | |
41 | $pos++;\r | |
42 | continue;\r | |
43 | }\r | |
44 | \r | |
45 | $i=1;\r | |
46 | if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r | |
47 | $i=4;\r | |
48 | else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r | |
49 | $i=3;\r | |
50 | else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r | |
51 | $i=2;\r | |
52 | else // 1 char (lower ascii)\r | |
53 | $i=1;\r | |
54 | $thisLetter = substr($source, $pos, $i);\r | |
55 | $pos += $i;\r | |
56 | \r | |
57 | // process the string representing the letter to a unicode entity\r | |
58 | $thisLen = strlen($thisLetter);\r | |
59 | $thisPos = 0;\r | |
60 | $decimalCode = 0;\r | |
61 | while ($thisPos < $thisLen)\r | |
62 | {\r | |
63 | $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r | |
64 | if ($thisPos == 0)\r | |
65 | {\r | |
66 | $charNum = intval($thisCharOrd - $decrement[$thisLen]);\r | |
67 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r | |
68 | }\r | |
69 | else\r | |
70 | {\r | |
71 | $charNum = intval($thisCharOrd - 128);\r | |
72 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r | |
73 | }\r | |
74 | \r | |
75 | $thisPos++;\r | |
76 | }\r | |
77 | \r | |
78 | $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r | |
79 | $encodedString .= $encodedLetter;\r | |
80 | }\r | |
81 | \r | |
82 | return $encodedString;\r | |
83 | }\r | |
84 | \r | |
85 | ?>\r |