Convert sources to UTF-8
[banana.git] / banana / text.func.inc.php
CommitLineData
382606fb 1<?php\r
172d5dd2 2/********************************************************************************\r
7027794f 3* banana/text.php : text tools\r
172d5dd2
PHM
4* ---------------\r
5*\r
6* This file is part of the banana distribution\r
7* Copyright: See COPYING files that comes with this distribution\r
8********************************************************************************/\r
382606fb 9\r
7027794f 10function _b_($str)\r
11{\r
7027794f 12 return dgettext('banana', $str);\r
13}\r
14\r
15if (!function_exists('is_utf8')) {\r
16 function is_utf8($s)\r
17 {\r
18 return @iconv('utf-8', 'utf-8', $s) == $s;\r
19 }\r
20}\r
21\r
22function banana_utf8entities($source)\r
382606fb
PHM
23{\r
24 // array used to figure what number to decrement from character order value \r
25 // according to number of characters used to map unicode to ascii by utf-8\r
26 $decrement[4] = 240;\r
27 $decrement[3] = 224;\r
28 $decrement[2] = 192;\r
29 $decrement[1] = 0;\r
30 \r
31 // the number of bits to shift each charNum by\r
32 $shift[1][0] = 0;\r
33 $shift[2][0] = 6;\r
34 $shift[2][1] = 0;\r
35 $shift[3][0] = 12;\r
36 $shift[3][1] = 6;\r
37 $shift[3][2] = 0;\r
38 $shift[4][0] = 18;\r
39 $shift[4][1] = 12;\r
40 $shift[4][2] = 6;\r
41 $shift[4][3] = 0;\r
42 \r
43 $pos = 0;\r
44 $len = strlen($source);\r
45 $encodedString = '';\r
46 while ($pos < $len)\r
47 {\r
48 $charPos = $source{$pos};\r
49 $asciiPos = ord($charPos);\r
50 if ($asciiPos < 128)\r
51 {\r
52 $encodedString .= $charPos;\r
53 $pos++;\r
54 continue;\r
55 }\r
56 \r
57 $i=1;\r
58 if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r
59 $i=4;\r
60 else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r
61 $i=3;\r
62 else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r
63 $i=2;\r
64 else // 1 char (lower ascii)\r
65 $i=1;\r
66 $thisLetter = substr($source, $pos, $i);\r
67 $pos += $i;\r
68 \r
69 // process the string representing the letter to a unicode entity\r
70 $thisLen = strlen($thisLetter);\r
71 $thisPos = 0;\r
72 $decimalCode = 0;\r
73 while ($thisPos < $thisLen)\r
74 {\r
75 $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r
76 if ($thisPos == 0)\r
77 {\r
78 $charNum = intval($thisCharOrd - $decrement[$thisLen]);\r
79 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
80 }\r
81 else\r
82 {\r
83 $charNum = intval($thisCharOrd - 128);\r
84 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
85 }\r
86 \r
87 $thisPos++;\r
88 }\r
89 \r
90 $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r
91 $encodedString .= $encodedLetter;\r
92 }\r
93 \r
94 return $encodedString;\r
95}\r
96\r
598a1c53 97// vim:set et sw=4 sts=4 ts=4 enc=utf-8:\r
382606fb 98?>\r