| 1 | <?php\r |
| 2 | /********************************************************************************\r |
| 3 | * banana/text.php : text tools\r |
| 4 | * ---------------\r |
| 5 | *\r |
| 6 | * This file is part of the banana distribution\r |
| 7 | * Copyright: See COPYING files that comes with this distribution\r |
| 8 | ********************************************************************************/\r |
| 9 | \r |
| 10 | if (function_exists('dgettext')) {\r |
| 11 | function _b_($str)\r |
| 12 | {\r |
| 13 | return dgettext('banana', $str);\r |
| 14 | }\r |
| 15 | } else {\r |
| 16 | function _b_($str)\r |
| 17 | {\r |
| 18 | return $str;\r |
| 19 | }\r |
| 20 | }\r |
| 21 | \r |
| 22 | if (!function_exists('is_utf8')) {\r |
| 23 | function is_utf8($s)\r |
| 24 | {\r |
| 25 | return @iconv('utf-8', 'utf-8', $s) == $s;\r |
| 26 | }\r |
| 27 | }\r |
| 28 | \r |
| 29 | function banana_entities($source)\r |
| 30 | {\r |
| 31 | if (is_string($source)) {\r |
| 32 | return banana_htmlentities($source);\r |
| 33 | } else {\r |
| 34 | return $source;\r |
| 35 | }\r |
| 36 | }\r |
| 37 | \r |
| 38 | function banana_utf8entities($source)\r |
| 39 | {\r |
| 40 | // array used to figure what number to decrement from character order value \r |
| 41 | // according to number of characters used to map unicode to ascii by utf-8\r |
| 42 | $decrement[4] = 240;\r |
| 43 | $decrement[3] = 224;\r |
| 44 | $decrement[2] = 192;\r |
| 45 | $decrement[1] = 0;\r |
| 46 | \r |
| 47 | // the number of bits to shift each charNum by\r |
| 48 | $shift[1][0] = 0;\r |
| 49 | $shift[2][0] = 6;\r |
| 50 | $shift[2][1] = 0;\r |
| 51 | $shift[3][0] = 12;\r |
| 52 | $shift[3][1] = 6;\r |
| 53 | $shift[3][2] = 0;\r |
| 54 | $shift[4][0] = 18;\r |
| 55 | $shift[4][1] = 12;\r |
| 56 | $shift[4][2] = 6;\r |
| 57 | $shift[4][3] = 0;\r |
| 58 | \r |
| 59 | $pos = 0;\r |
| 60 | $len = strlen($source);\r |
| 61 | $encodedString = '';\r |
| 62 | while ($pos < $len)\r |
| 63 | {\r |
| 64 | $charPos = $source{$pos};\r |
| 65 | $asciiPos = ord($charPos);\r |
| 66 | if ($asciiPos < 128)\r |
| 67 | {\r |
| 68 | $encodedString .= $charPos;\r |
| 69 | $pos++;\r |
| 70 | continue;\r |
| 71 | }\r |
| 72 | \r |
| 73 | $i=1;\r |
| 74 | if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r |
| 75 | $i=4;\r |
| 76 | else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r |
| 77 | $i=3;\r |
| 78 | else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r |
| 79 | $i=2;\r |
| 80 | else // 1 char (lower ascii)\r |
| 81 | $i=1;\r |
| 82 | $thisLetter = substr($source, $pos, $i);\r |
| 83 | $pos += $i;\r |
| 84 | \r |
| 85 | // process the string representing the letter to a unicode entity\r |
| 86 | $thisLen = strlen($thisLetter);\r |
| 87 | $thisPos = 0;\r |
| 88 | $decimalCode = 0;\r |
| 89 | while ($thisPos < $thisLen)\r |
| 90 | {\r |
| 91 | $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r |
| 92 | if ($thisPos == 0)\r |
| 93 | {\r |
| 94 | $charNum = intval($thisCharOrd - $decrement[$thisLen]);\r |
| 95 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r |
| 96 | }\r |
| 97 | else\r |
| 98 | {\r |
| 99 | $charNum = intval($thisCharOrd - 128);\r |
| 100 | $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r |
| 101 | }\r |
| 102 | \r |
| 103 | $thisPos++;\r |
| 104 | }\r |
| 105 | \r |
| 106 | $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r |
| 107 | $encodedString .= $encodedLetter;\r |
| 108 | }\r |
| 109 | \r |
| 110 | return $encodedString;\r |
| 111 | }\r |
| 112 | \r |
| 113 | // vim:set et sw=4 sts=4 ts=4 enc=utf-8:\r |
| 114 | ?>\r |