¤ thing + #294
[banana.git] / banana / utf8.php
CommitLineData
382606fb
PHM
1<?php\r
2\r
3/**\r
4 * Transforme une chaine encodée en UTF-8, et la convertit\r
5 * en entitiées unicode &#xxx; pour que ça s'affiche correctement\r
6 * dans les navigateurs, sans forcément tenir compte du meta\r
7 * content-type charset...\r
8 * @param String $source la chaine en UTF-8\r
9 * @return String les entitées\r
10 * @access public\r
11 * @see http://www.php.net/utf8_decode\r
12 */\r
13function utf8entities($source)\r
14{\r
15 // array used to figure what number to decrement from character order value \r
16 // according to number of characters used to map unicode to ascii by utf-8\r
17 $decrement[4] = 240;\r
18 $decrement[3] = 224;\r
19 $decrement[2] = 192;\r
20 $decrement[1] = 0;\r
21 \r
22 // the number of bits to shift each charNum by\r
23 $shift[1][0] = 0;\r
24 $shift[2][0] = 6;\r
25 $shift[2][1] = 0;\r
26 $shift[3][0] = 12;\r
27 $shift[3][1] = 6;\r
28 $shift[3][2] = 0;\r
29 $shift[4][0] = 18;\r
30 $shift[4][1] = 12;\r
31 $shift[4][2] = 6;\r
32 $shift[4][3] = 0;\r
33 \r
34 $pos = 0;\r
35 $len = strlen($source);\r
36 $encodedString = '';\r
37 while ($pos < $len)\r
38 {\r
39 $charPos = $source{$pos};\r
40 $asciiPos = ord($charPos);\r
41 if ($asciiPos < 128)\r
42 {\r
43 $encodedString .= $charPos;\r
44 $pos++;\r
45 continue;\r
46 }\r
47 \r
48 $i=1;\r
49 if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r
50 $i=4;\r
51 else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r
52 $i=3;\r
53 else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r
54 $i=2;\r
55 else // 1 char (lower ascii)\r
56 $i=1;\r
57 $thisLetter = substr($source, $pos, $i);\r
58 $pos += $i;\r
59 \r
60 // process the string representing the letter to a unicode entity\r
61 $thisLen = strlen($thisLetter);\r
62 $thisPos = 0;\r
63 $decimalCode = 0;\r
64 while ($thisPos < $thisLen)\r
65 {\r
66 $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r
67 if ($thisPos == 0)\r
68 {\r
69 $charNum = intval($thisCharOrd - $decrement[$thisLen]);\r
70 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
71 }\r
72 else\r
73 {\r
74 $charNum = intval($thisCharOrd - 128);\r
75 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
76 }\r
77 \r
78 $thisPos++;\r
79 }\r
80 \r
81 $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r
82 $encodedString .= $encodedLetter;\r
83 }\r
84 \r
85 return $encodedString;\r
86}\r
87\r
88?>\r