6a9fb5183626850494484e1723048880ab24c440
[banana.git] / banana / utf8.php
1 <?php
2
3 /**
4 * Transforme une chaine encodée en UTF-8, et la convertit
5 * en entitiées unicode &#xxx; pour que ça s'affiche correctement
6 * dans les navigateurs, sans forcément tenir compte du meta
7 * content-type charset...
8 * @param String $source la chaine en UTF-8
9 * @return String les entitées
10 * @access public
11 * @see http://www.php.net/utf8_decode
12 */
13 function utf8entities($source)
14 {
15 // array used to figure what number to decrement from character order value
16 // according to number of characters used to map unicode to ascii by utf-8
17 $decrement[4] = 240;
18 $decrement[3] = 224;
19 $decrement[2] = 192;
20 $decrement[1] = 0;
21
22 // the number of bits to shift each charNum by
23 $shift[1][0] = 0;
24 $shift[2][0] = 6;
25 $shift[2][1] = 0;
26 $shift[3][0] = 12;
27 $shift[3][1] = 6;
28 $shift[3][2] = 0;
29 $shift[4][0] = 18;
30 $shift[4][1] = 12;
31 $shift[4][2] = 6;
32 $shift[4][3] = 0;
33
34 $pos = 0;
35 $len = strlen($source);
36 $encodedString = '';
37 while ($pos < $len)
38 {
39 $charPos = $source{$pos};
40 $asciiPos = ord($charPos);
41 if ($asciiPos < 128)
42 {
43 $encodedString .= $charPos;
44 $pos++;
45 continue;
46 }
47
48 $i=1;
49 if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character
50 $i=4;
51 else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character
52 $i=3;
53 else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character
54 $i=2;
55 else // 1 char (lower ascii)
56 $i=1;
57 $thisLetter = substr($source, $pos, $i);
58 $pos += $i;
59
60 // process the string representing the letter to a unicode entity
61 $thisLen = strlen($thisLetter);
62 $thisPos = 0;
63 $decimalCode = 0;
64 while ($thisPos < $thisLen)
65 {
66 $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));
67 if ($thisPos == 0)
68 {
69 $charNum = intval($thisCharOrd - $decrement[$thisLen]);
70 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
71 }
72 else
73 {
74 $charNum = intval($thisCharOrd - 128);
75 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
76 }
77
78 $thisPos++;
79 }
80
81 $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';
82 $encodedString .= $encodedLetter;
83 }
84
85 return $encodedString;
86 }
87
88 ?>