c847bdd2274771a31587a5316dcad33aa42483b1
[banana.git] / banana / utf8.php
1 <?php
2 /********************************************************************************
3 * banana/utf8.php : utf8 to html entities
4 * ---------------
5 *
6 * This file is part of the banana distribution
7 * Copyright: See COPYING files that comes with this distribution
8 ********************************************************************************/
9
10 function utf8entities($source)
11 {
12 // array used to figure what number to decrement from character order value
13 // according to number of characters used to map unicode to ascii by utf-8
14 $decrement[4] = 240;
15 $decrement[3] = 224;
16 $decrement[2] = 192;
17 $decrement[1] = 0;
18
19 // the number of bits to shift each charNum by
20 $shift[1][0] = 0;
21 $shift[2][0] = 6;
22 $shift[2][1] = 0;
23 $shift[3][0] = 12;
24 $shift[3][1] = 6;
25 $shift[3][2] = 0;
26 $shift[4][0] = 18;
27 $shift[4][1] = 12;
28 $shift[4][2] = 6;
29 $shift[4][3] = 0;
30
31 $pos = 0;
32 $len = strlen($source);
33 $encodedString = '';
34 while ($pos < $len)
35 {
36 $charPos = $source{$pos};
37 $asciiPos = ord($charPos);
38 if ($asciiPos < 128)
39 {
40 $encodedString .= $charPos;
41 $pos++;
42 continue;
43 }
44
45 $i=1;
46 if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character
47 $i=4;
48 else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character
49 $i=3;
50 else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character
51 $i=2;
52 else // 1 char (lower ascii)
53 $i=1;
54 $thisLetter = substr($source, $pos, $i);
55 $pos += $i;
56
57 // process the string representing the letter to a unicode entity
58 $thisLen = strlen($thisLetter);
59 $thisPos = 0;
60 $decimalCode = 0;
61 while ($thisPos < $thisLen)
62 {
63 $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));
64 if ($thisPos == 0)
65 {
66 $charNum = intval($thisCharOrd - $decrement[$thisLen]);
67 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
68 }
69 else
70 {
71 $charNum = intval($thisCharOrd - 128);
72 $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
73 }
74
75 $thisPos++;
76 }
77
78 $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';
79 $encodedString .= $encodedLetter;
80 }
81
82 return $encodedString;
83 }
84
85 ?>