Commit | Line | Data |
---|---|---|
52d7843e FB |
1 | <?php |
2 | /******************************************************************************** | |
3 | * banana/text.php : text tools | |
4 | * --------------- | |
5 | * | |
6 | * This file is part of the banana distribution | |
7 | * Copyright: See COPYING files that comes with this distribution | |
8 | ********************************************************************************/ | |
9 | ||
10 | if (function_exists('dgettext')) { | |
11 | function _b_($str) | |
12 | { | |
13 | return dgettext('banana', $str); | |
14 | } | |
15 | } else { | |
16 | function _b_($str) | |
17 | { | |
18 | return $str; | |
19 | } | |
20 | } | |
21 | ||
22 | if (!function_exists('is_utf8')) { | |
23 | function is_utf8($s) | |
24 | { | |
25 | return @iconv('utf-8', 'utf-8', $s) == $s; | |
26 | } | |
27 | } | |
28 | ||
29 | function banana_entities($source) | |
30 | { | |
31 | if (is_string($source)) { | |
32 | return banana_htmlentities($source); | |
33 | } else { | |
34 | return $source; | |
35 | } | |
36 | } | |
37 | ||
38 | function banana_utf8entities($source) | |
39 | { | |
40 | // array used to figure what number to decrement from character order value | |
41 | // according to number of characters used to map unicode to ascii by utf-8 | |
42 | $decrement[4] = 240; | |
43 | $decrement[3] = 224; | |
44 | $decrement[2] = 192; | |
45 | $decrement[1] = 0; | |
46 | ||
47 | // the number of bits to shift each charNum by | |
48 | $shift[1][0] = 0; | |
49 | $shift[2][0] = 6; | |
50 | $shift[2][1] = 0; | |
51 | $shift[3][0] = 12; | |
52 | $shift[3][1] = 6; | |
53 | $shift[3][2] = 0; | |
54 | $shift[4][0] = 18; | |
55 | $shift[4][1] = 12; | |
56 | $shift[4][2] = 6; | |
57 | $shift[4][3] = 0; | |
58 | ||
59 | $pos = 0; | |
60 | $len = strlen($source); | |
61 | $encodedString = ''; | |
62 | while ($pos < $len) | |
63 | { | |
64 | $charPos = $source{$pos}; | |
65 | $asciiPos = ord($charPos); | |
66 | if ($asciiPos < 128) | |
67 | { | |
68 | $encodedString .= $charPos; | |
69 | $pos++; | |
70 | continue; | |
71 | } | |
72 | ||
73 | $i=1; | |
74 | if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character | |
75 | $i=4; | |
76 | else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character | |
77 | $i=3; | |
78 | else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character | |
79 | $i=2; | |
80 | else // 1 char (lower ascii) | |
81 | $i=1; | |
82 | $thisLetter = substr($source, $pos, $i); | |
83 | $pos += $i; | |
84 | ||
85 | // process the string representing the letter to a unicode entity | |
86 | $thisLen = strlen($thisLetter); | |
87 | $thisPos = 0; | |
88 | $decimalCode = 0; | |
89 | while ($thisPos < $thisLen) | |
90 | { | |
91 | $thisCharOrd = ord(substr($thisLetter, $thisPos, 1)); | |
92 | if ($thisPos == 0) | |
93 | { | |
94 | $charNum = intval($thisCharOrd - $decrement[$thisLen]); | |
95 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]); | |
96 | } | |
97 | else | |
98 | { | |
99 | $charNum = intval($thisCharOrd - 128); | |
100 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]); | |
101 | } | |
102 | ||
103 | $thisPos++; | |
104 | } | |
105 | ||
106 | $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';'; | |
107 | $encodedString .= $encodedLetter; | |
108 | } | |
109 | ||
110 | return $encodedString; | |
111 | } | |
112 | ||
a90b6fc9 | 113 | // vim:set et sw=4 sts=4 ts=4 fenc=utf-8: |
52d7843e | 114 | ?> |