[banana.git] / banana / text.func.inc.php

<?php\r
/********************************************************************************\r
* banana/text.php : text tools\r
* ---------------\r
*\r
* This file is part of the banana distribution\r
* Copyright: See COPYING files that comes with this distribution\r
********************************************************************************/\r
\r
if (function_exists('dgettext')) {\r
    function _b_($str)\r
    {\r
        return dgettext('banana', $str);\r
    }\r
} else {\r
    function _b_($str)\r
    {\r
        return $str;\r
    }\r
}\r
\r
if (!function_exists('is_utf8')) {\r
    function is_utf8($s)\r
    {\r
        return @iconv('utf-8', 'utf-8', $s) == $s;\r
    }\r
}\r
\r
function banana_entities($source)\r
{\r
    if (is_string($source)) {\r
        return banana_htmlentities($source);\r
    } else {\r
        return $source;\r
    }\r
}\r
\r
function banana_utf8entities($source)\r
{\r
   // array used to figure what number to decrement from character order value \r
   // according to number of characters used to map unicode to ascii by utf-8\r
   $decrement[4] = 240;\r
   $decrement[3] = 224;\r
   $decrement[2] = 192;\r
   $decrement[1] = 0;\r
   \r
   // the number of bits to shift each charNum by\r
   $shift[1][0] = 0;\r
   $shift[2][0] = 6;\r
   $shift[2][1] = 0;\r
   $shift[3][0] = 12;\r
   $shift[3][1] = 6;\r
   $shift[3][2] = 0;\r
   $shift[4][0] = 18;\r
   $shift[4][1] = 12;\r
   $shift[4][2] = 6;\r
   $shift[4][3] = 0;\r
   \r
   $pos = 0;\r
   $len = strlen($source);\r
   $encodedString = '';\r
   while ($pos < $len)\r
   {\r
      $charPos = $source{$pos};\r
      $asciiPos = ord($charPos);\r
      if ($asciiPos < 128)\r
      {\r
         $encodedString .= $charPos;\r
         $pos++;\r
         continue;\r
      }\r
      \r
      $i=1;\r
      if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r
         $i=4;\r
      else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r
         $i=3;\r
      else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r
         $i=2;\r
      else // 1 char (lower ascii)\r
         $i=1;\r
      $thisLetter = substr($source, $pos, $i);\r
      $pos += $i;\r
      \r
      // process the string representing the letter to a unicode entity\r
      $thisLen = strlen($thisLetter);\r
      $thisPos = 0;\r
      $decimalCode = 0;\r
      while ($thisPos < $thisLen)\r
      {\r
         $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r
         if ($thisPos == 0)\r
         {\r
            $charNum = intval($thisCharOrd - $decrement[$thisLen]);\r
            $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
         }\r
         else\r
         {\r
            $charNum = intval($thisCharOrd - 128);\r
            $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
         }\r
         \r
         $thisPos++;\r
      }\r
      \r
      $encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r
      $encodedString .= $encodedLetter;\r
   }\r
   \r
   return $encodedString;\r
}\r
\r
// vim:set et sw=4 sts=4 ts=4 enc=utf-8:\r
?>\r
Commit	Line	Data
	1	<?php\r
	2	/********************************************************************************\r
	3	* banana/text.php : text tools\r
	4	* ---------------\r
	5	*\r
	6	* This file is part of the banana distribution\r
	7	* Copyright: See COPYING files that comes with this distribution\r
	8	********************************************************************************/\r
	9	\r
	10	if (function_exists('dgettext')) {\r
	11	function _b_($str)\r
	12	{\r
	13	return dgettext('banana', $str);\r
	14	}\r
	15	} else {\r
	16	function _b_($str)\r
	17	{\r
	18	return $str;\r
	19	}\r
	20	}\r
	21	\r
	22	if (!function_exists('is_utf8')) {\r
	23	function is_utf8($s)\r
	24	{\r
	25	return @iconv('utf-8', 'utf-8', $s) == $s;\r
	26	}\r
	27	}\r
	28	\r
	29	function banana_entities($source)\r
	30	{\r
	31	if (is_string($source)) {\r
	32	return banana_htmlentities($source);\r
	33	} else {\r
	34	return $source;\r
	35	}\r
	36	}\r
	37	\r
	38	function banana_utf8entities($source)\r
	39	{\r
	40	// array used to figure what number to decrement from character order value \r
	41	// according to number of characters used to map unicode to ascii by utf-8\r
	42	$decrement[4] = 240;\r
	43	$decrement[3] = 224;\r
	44	$decrement[2] = 192;\r
	45	$decrement[1] = 0;\r
	46	\r
	47	// the number of bits to shift each charNum by\r
	48	$shift[1][0] = 0;\r
	49	$shift[2][0] = 6;\r
	50	$shift[2][1] = 0;\r
	51	$shift[3][0] = 12;\r
	52	$shift[3][1] = 6;\r
	53	$shift[3][2] = 0;\r
	54	$shift[4][0] = 18;\r
	55	$shift[4][1] = 12;\r
	56	$shift[4][2] = 6;\r
	57	$shift[4][3] = 0;\r
	58	\r
	59	$pos = 0;\r
	60	$len = strlen($source);\r
	61	$encodedString = '';\r
	62	while ($pos < $len)\r
	63	{\r
	64	$charPos = $source{$pos};\r
	65	$asciiPos = ord($charPos);\r
	66	if ($asciiPos < 128)\r
	67	{\r
	68	$encodedString .= $charPos;\r
	69	$pos++;\r
	70	continue;\r
	71	}\r
	72	\r
	73	$i=1;\r
	74	if (($asciiPos >= 240) && ($asciiPos <= 255)) // 4 chars representing one unicode character\r
	75	$i=4;\r
	76	else if (($asciiPos >= 224) && ($asciiPos <= 239)) // 3 chars representing one unicode character\r
	77	$i=3;\r
	78	else if (($asciiPos >= 192) && ($asciiPos <= 223)) // 2 chars representing one unicode character\r
	79	$i=2;\r
	80	else // 1 char (lower ascii)\r
	81	$i=1;\r
	82	$thisLetter = substr($source, $pos, $i);\r
	83	$pos += $i;\r
	84	\r
	85	// process the string representing the letter to a unicode entity\r
	86	$thisLen = strlen($thisLetter);\r
	87	$thisPos = 0;\r
	88	$decimalCode = 0;\r
	89	while ($thisPos < $thisLen)\r
	90	{\r
	91	$thisCharOrd = ord(substr($thisLetter, $thisPos, 1));\r
	92	if ($thisPos == 0)\r
	93	{\r
	94	$charNum = intval($thisCharOrd - $decrement[$thisLen]);\r
	95	$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
	96	}\r
	97	else\r
	98	{\r
	99	$charNum = intval($thisCharOrd - 128);\r
	100	$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);\r
	101	}\r
	102	\r
	103	$thisPos++;\r
	104	}\r
	105	\r
	106	$encodedLetter = '&#'. str_pad($decimalCode, ($thisLen==1)?3:5, '0', STR_PAD_LEFT).';';\r
	107	$encodedString .= $encodedLetter;\r
	108	}\r
	109	\r
	110	return $encodedString;\r
	111	}\r
	112	\r
	113	// vim:set et sw=4 sts=4 ts=4 enc=utf-8:\r
	114	?>\r