Convert sources to UTF-8
[banana.git] / banana / text.func.inc.php
1 <?php
2 /********************************************************************************
3 * banana/text.php : text tools
4 * ---------------
5 *
6 * This file is part of the banana distribution
7 * Copyright: See COPYING files that comes with this distribution
8 ********************************************************************************/
9
10 function _b_($str)
11 {
12 return dgettext('banana', $str);
13 }
14
15 if (!function_exists('is_utf8')) {
16 function is_utf8($s)
17 {
18 return @iconv('utf-8', 'utf-8', $s) == $s;
19 }
20 }
21
22 function banana_utf8entities($source)
23 {
24 // array used to figure what number to decrement from character order value
25 // according to number of characters used to map unicode to ascii by utf-8
26 $decrement[4] = 240;
27 $decrement[3] = 224;
28 $decrement[2] = 192;
29 $decrement[1] = 0;
30
31 // the number of bits to shift each charNum by
32 $shift[1][0] = 0;
33 $shift[2][0] = 6;
34 $shift[2][1] = 0;
35 $shift[3][0] = 12;
36 $shift[3][1] = 6;
37 $shift[3][2] = 0;
38 $shift[4][0] = 18;
39 $shift[4][1] = 12;
40 $shift[4][2] = 6;
41 $shift[4][3] = 0;
42
43 $pos = 0;
44 $len = strlen($source);
45 $encodedString = '';
46 while ($pos < $len)
47 {
48 $charPos = $source{$pos};
49 $asciiPos = ord($charPos);
50 if ($asciiPos < 128)
51 {
52 $encodedString .= $charPos;
53 $pos++;
54 continue;
55 }
56
57 $i=1;
58 if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character
59 $i=4;
60 else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character
61 $i=3;
62 else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character
63 $i=2;
64 else // 1 char (lower ascii)
65 $i=1;
66 $thisLetter = substr($source, $pos, $i);
67 $pos += $i;
68
69 // process the string representing the letter to a unicode entity
70 $thisLen = strlen($thisLetter);
71 $thisPos = 0;
72 $decimalCode = 0;
73 while ($thisPos < $thisLen)
74 {
75 $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));
76 if ($thisPos == 0)
77 {
78 $charNum = intval($thisCharOrd - $decrement[$thisLen]);
79 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
80 }
81 else
82 {
83 $charNum = intval($thisCharOrd - 128);
84 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
85 }
86
87 $thisPos++;
88 }
89
90 $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';
91 $encodedString .= $encodedLetter;
92 }
93
94 return $encodedString;
95 }
96
97 // vim:set et sw=4 sts=4 ts=4 enc=utf-8:
98 ?>