Add vim modeline and retab sources..
[banana.git] / banana / utf8.php
CommitLineData
382606fb 1<?php\r
172d5dd2
PHM
2/********************************************************************************\r
3* banana/utf8.php : utf8 to html entities\r
4* ---------------\r
5*\r
6* This file is part of the banana distribution\r
7* Copyright: See COPYING files that comes with this distribution\r
8********************************************************************************/\r
382606fb 9\r
382606fb
PHM
10function utf8entities($source)\r
11{\r
12 // array used to figure what number to decrement from character order value \r
13 // according to number of characters used to map unicode to ascii by utf-8\r
14 $decrement[4] = 240;\r
15 $decrement[3] = 224;\r
16 $decrement[2] = 192;\r
17 $decrement[1] = 0;\r
18 \r
19 // the number of bits to shift each charNum by\r
20 $shift[1][0] = 0;\r
21 $shift[2][0] = 6;\r
22 $shift[2][1] = 0;\r
23 $shift[3][0] = 12;\r
24 $shift[3][1] = 6;\r
25 $shift[3][2] = 0;\r
26 $shift[4][0] = 18;\r
27 $shift[4][1] = 12;\r
28 $shift[4][2] = 6;\r
29 $shift[4][3] = 0;\r
30 \r
31 $pos = 0;\r
32 $len = strlen($source);\r
33 $encodedString = '';\r
34 while ($pos < $len)\r
35 {\r
36 $charPos = $source{$pos};\r
37 $asciiPos = ord($charPos);\r
38 if ($asciiPos < 128)\r
39 {\r
40 $encodedString .= $charPos;\r
41 $pos++;\r
42 continue;\r
43 }\r
44 \r
45 $i=1;\r
46 if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r
47 $i=4;\r
48 else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r
49 $i=3;\r
50 else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r
51 $i=2;\r
52 else // 1 char (lower ascii)\r
53 $i=1;\r
54 $thisLetter = substr($source, $pos, $i);\r
55 $pos += $i;\r
56 \r
57 // process the string representing the letter to a unicode entity\r
58 $thisLen = strlen($thisLetter);\r
59 $thisPos = 0;\r
60 $decimalCode = 0;\r
61 while ($thisPos < $thisLen)\r
62 {\r
63 $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r
64 if ($thisPos == 0)\r
65 {\r
66 $charNum = intval($thisCharOrd - $decrement[$thisLen]);\r
67 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
68 }\r
69 else\r
70 {\r
71 $charNum = intval($thisCharOrd - 128);\r
72 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
73 }\r
74 \r
75 $thisPos++;\r
76 }\r
77 \r
78 $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r
79 $encodedString .= $encodedLetter;\r
80 }\r
81 \r
82 return $encodedString;\r
83}\r
84\r
d5588318 85// vim:set et sw=4 sts=4 ts=4\r
382606fb 86?>\r