78cd27b3 |
1 | <?php\r |
2 | /********************************************************************************\r |
ab02e8a9 |
3 | * banana/text.php : text tools\r |
78cd27b3 |
4 | * ---------------\r |
5 | *\r |
6 | * This file is part of the banana distribution\r |
7 | * Copyright: See COPYING files that comes with this distribution\r |
8 | ********************************************************************************/\r |
9 | \r |
ab02e8a9 |
10 | function _b_($str)\r |
11 | {\r |
ab02e8a9 |
12 | return dgettext('banana', $str);\r |
13 | }\r |
14 | \r |
15 | if (!function_exists('is_utf8')) {\r |
16 | function is_utf8($s)\r |
17 | {\r |
18 | return @iconv('utf-8', 'utf-8', $s) == $s;\r |
19 | }\r |
20 | }\r |
21 | \r |
22 | function banana_utf8entities($source)\r |
78cd27b3 |
23 | {\r |
24 | // array used to figure what number to decrement from character order value \r |
25 | // according to number of characters used to map unicode to ascii by utf-8\r |
26 | $decrement[4] = 240;\r |
27 | $decrement[3] = 224;\r |
28 | $decrement[2] = 192;\r |
29 | $decrement[1] = 0;\r |
30 | \r |
31 | // the number of bits to shift each charNum by\r |
32 | $shift[1][0] = 0;\r |
33 | $shift[2][0] = 6;\r |
34 | $shift[2][1] = 0;\r |
35 | $shift[3][0] = 12;\r |
36 | $shift[3][1] = 6;\r |
37 | $shift[3][2] = 0;\r |
38 | $shift[4][0] = 18;\r |
39 | $shift[4][1] = 12;\r |
40 | $shift[4][2] = 6;\r |
41 | $shift[4][3] = 0;\r |
42 | \r |
43 | $pos = 0;\r |
44 | $len = strlen($source);\r |
45 | $encodedString = '';\r |
46 | while ($pos < $len)\r |
47 | {\r |
48 | $charPos = $source{$pos};\r |
49 | $asciiPos = ord($charPos);\r |
50 | if ($asciiPos < 128)\r |
51 | {\r |
52 | $encodedString .= $charPos;\r |
53 | $pos++;\r |
54 | continue;\r |
55 | }\r |
56 | \r |
57 | $i=1;\r |
58 | if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r |
59 | $i=4;\r |
60 | else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r |
61 | $i=3;\r |
62 | else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r |
63 | $i=2;\r |
64 | else // 1 char (lower ascii)\r |
65 | $i=1;\r |
66 | $thisLetter = substr($source, $pos, $i);\r |
67 | $pos += $i;\r |
68 | \r |
69 | // process the string representing the letter to a unicode entity\r |
70 | $thisLen = strlen($thisLetter);\r |
71 | $thisPos = 0;\r |
72 | $decimalCode = 0;\r |
73 | while ($thisPos < $thisLen)\r |
74 | {\r |
75 | $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r |
76 | if ($thisPos == 0)\r |
77 | {\r |
78 | $charNum = intval($thisCharOrd - $decrement[$thisLen]);\r |
79 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r |
80 | }\r |
81 | else\r |
82 | {\r |
83 | $charNum = intval($thisCharOrd - 128);\r |
84 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r |
85 | }\r |
86 | \r |
87 | $thisPos++;\r |
88 | }\r |
89 | \r |
90 | $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r |
91 | $encodedString .= $encodedLetter;\r |
92 | }\r |
93 | \r |
94 | return $encodedString;\r |
95 | }\r |
96 | \r |
d8d416c4 |
97 | // vim:set et sw=4 sts=4 ts=4 enc=utf-8:\r |
78cd27b3 |
98 | ?>\r |