[old-projects.git] / ekit / com / swabunga / spell / engine / GenericTransformator.java

package  com.swabunga.spell.engine;\r\r
\r\r
import java.io.*;\r\r
import java.util.*;\r\r
\r\r
/** A Generic implementation of a transformator takes an aspell\r\r
 *  phonetics file and constructs some sort of transformationtable using\r\r
 *  the inner class Rule.\r\r
 *\r\r
 * @author Robert Gustavsson (robert@lindesign.se)\r\r
 */\r\r
public class GenericTransformator implements Transformator{\r\r
    \r\r
    public static final char STARTMULTI='(';\r\r
    public static final char ENDMULTI=')';\r\r
\r\r
    Object[] ruleArray=null;\r\r
\r\r
    public GenericTransformator(File phonetic)throws IOException{\r\r
        buildRules(new BufferedReader(new FileReader(phonetic)));\r\r
    }\r\r
\r\r
    /**\r\r
    * Returns the phonetic code of the word.\r\r
    */\r\r
    public String transform(String word) {       \r\r
        if(ruleArray==null)\r\r
            return null;\r\r
        TransformationRule rule;\r\r
        StringBuffer str=new StringBuffer(word.toUpperCase());\r\r
        int strLength=str.length();\r\r
        int startPos=0, add=1;\r\r
\r\r
        while(startPos<strLength){\r\r
            //System.out.println("StartPos:"+startPos);\r\r
            add=1;\r\r
            for(int i=0;i<ruleArray.length;i++){\r\r
                //System.out.println("Testing rule#:"+i);\r\r
                rule=(TransformationRule)ruleArray[i];\r\r
                if(rule.startsWithExp() && startPos>0)\r\r
                    continue;\r\r
                if(startPos+rule.lengthOfMatch()>=strLength)\r\r
                    continue;\r\r
                if(rule.isMatching(str,startPos)){\r\r
                    str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());\r\r
                    add=rule.getReplaceExp().length();\r\r
                    strLength-=rule.getTakeOut();\r\r
                    strLength+=add;\r\r
                    //System.out.println("Replacing with rule#:"+i+" add="+add);\r\r
                    break;\r\r
                }\r\r
            }\r\r
            startPos+=add;\r\r
        }\r\r
        return str.toString();\r\r
    }\r\r
\r\r
    // Used to build up the transformastion table.\r\r
    private void buildRules(BufferedReader in)throws IOException{\r\r
        String read=null;\r\r
        LinkedList ruleList=new LinkedList();\r\r
        while((read=in.readLine())!=null){\r\r
            buildRule(realTrimmer(read),ruleList);\r\r
        }\r\r
        ruleArray=ruleList.toArray();\r\r
    }\r\r
    \r\r
    // Here is where the real work of reading the phonetics file is done.\r\r
    private void buildRule(String str, LinkedList ruleList){\r\r
        if(str.length()<1)\r\r
            return;\r\r
        if(str.startsWith("version"))\r\r
            return;\r\r
        \r\r
        TransformationRule rule=null;\r\r
        StringBuffer matchExp=new StringBuffer();\r\r
        StringBuffer replaceExp=new StringBuffer();\r\r
        boolean start=false, end=false;\r\r
        int takeOutPart=0, matchLength=0;\r\r
        boolean match=true, inMulti=false;\r\r
        for(int i=0;i<str.length();i++){\r\r
            if(Character.isWhitespace(str.charAt(i))){\r\r
                match=false;\r\r
            }else{\r\r
                if(match){\r\r
                    if (!isReservedChar(str.charAt(i))){\r\r
                        matchExp.append(str.charAt(i));\r\r
                        if(!inMulti){\r\r
                            takeOutPart++;\r\r
                            matchLength++;\r\r
                        }\r\r
                        if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)\r\r
                            inMulti=!inMulti;\r\r
                    }\r\r
                    if (str.charAt(i)=='-')\r\r
                        takeOutPart--;\r\r
                    if (str.charAt(i)=='^')\r\r
                        start=true;\r\r
                    if (str.charAt(i)=='$')\r\r
                        end=true;\r\r
                }else{\r\r
                    replaceExp.append(str.charAt(i));\r\r
                }\r\r
            }\r\r
        }\r\r
        rule=new TransformationRule(matchExp.toString(), replaceExp.toString()\r\r
                                        , takeOutPart, matchLength, start, end);\r\r
        ruleList.add(rule);\r\r
    }\r\r
    \r\r
    // Chars with special meaning to aspell. Not everyone is implemented here.\r\r
    private boolean isReservedChar(char ch){\r\r
        if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))\r\r
            return true;\r\r
        return false;\r\r
    }\r\r
\r\r
    // Trims off everything we don't care about.\r\r
    private String realTrimmer(String row){\r\r
        int pos=row.indexOf('#');\r\r
        if(pos!=-1){\r\r
            row=row.substring(0,pos);\r\r
        }\r\r
        return row.trim();\r\r
    }\r\r
\r\r
    // Inner Classes\r\r
    /*\r\r
    * Holds the match string and the replace string and all the rule attributes.\r\r
    * Is responsible for indicating matches.\r\r
    */\r\r
    private class TransformationRule{\r\r
\r\r
        private String replace;\r\r
        private char[] match;\r\r
        // takeOut=number of chars to replace; \r\r
        // matchLength=length of matching string counting multies as one.\r\r
        private int takeOut, matchLength;\r\r
        private boolean start, end;\r\r
\r\r
        // Construktor\r\r
        public TransformationRule(String match, String replace, int takeout\r\r
                                  , int matchLength, boolean start, boolean end){\r\r
            this.match=match.toCharArray();\r\r
            this.replace=replace;\r\r
            this.takeOut=takeout;\r\r
            this.matchLength=matchLength;\r\r
            this.start=start;\r\r
            this.end=end;\r\r
        }\r\r
\r\r
        /*\r\r
        * Returns true if word from pos and forward matches the match string.\r\r
        * Precondition: wordPos+matchLength<word.length()\r\r
        */\r\r
        public boolean isMatching(StringBuffer word, int wordPos){\r\r
            boolean matching=true, inMulti=false, multiMatch=false;\r\r
            char matchCh;\r\r
            \r\r
            for(int matchPos=0;matchPos<match.length;matchPos++){\r\r
                matchCh=match[matchPos];\r\r
                if(matchCh==STARTMULTI || matchCh==ENDMULTI){\r\r
                    inMulti=!inMulti;\r\r
                    if(!inMulti)\r\r
                        matching=matching & multiMatch;\r\r
                    else\r\r
                        multiMatch=false;\r\r
                }else{\r\r
                    if(matchCh!=word.charAt(wordPos)){\r\r
                        if(inMulti)\r\r
                            multiMatch=multiMatch | false;\r\r
                        else\r\r
                            matching=false;\r\r
                    }else{\r\r
                        if(inMulti)\r\r
                            multiMatch=multiMatch | true;\r\r
                        else\r\r
                            matching=true;\r\r
                    }\r\r
                    if(!inMulti)\r\r
                        wordPos++;\r\r
                    if(!matching)\r\r
                        break;\r\r
                }\r\r
            }\r\r
            if(end && wordPos!=word.length()-1)\r\r
                matching=false;\r\r
            return matching;\r\r
        }\r\r
\r\r
        public String getReplaceExp(){\r\r
            return  replace;\r\r
        }\r\r
\r\r
        public int getTakeOut(){\r\r
            return takeOut;\r\r
        }\r\r
\r\r
        public boolean startsWithExp(){\r\r
            return start;\r\r
        }\r\r
        \r\r
        public int lengthOfMatch(){\r\r
            return matchLength;\r\r
        }\r\r
        \r\r
        // Just for debugging purposes.\r\r
        public String toString(){\r\r
            return "Match:"+String.valueOf(match)\r\r
                   +" Replace:"+replace\r\r
                   +" TakeOut:"+takeOut\r\r
                   +" MatchLength:"+matchLength\r\r
                   +" Start:"+start\r\r
                   +" End:"+end;\r\r
        }\r\r
\r\r
    }\r\r
}\r\r
Commit	Line	Data
	1	package com.swabunga.spell.engine;\r\r
	2	\r\r
	3	import java.io.*;\r\r
	4	import java.util.*;\r\r
	5	\r\r
	6	/** A Generic implementation of a transformator takes an aspell\r\r
	7	* phonetics file and constructs some sort of transformationtable using\r\r
	8	* the inner class Rule.\r\r
	9	*\r\r
	10	* @author Robert Gustavsson (robert@lindesign.se)\r\r
	11	*/\r\r
	12	public class GenericTransformator implements Transformator{\r\r
	13	\r\r
	14	public static final char STARTMULTI='(';\r\r
	15	public static final char ENDMULTI=')';\r\r
	16	\r\r
	17	Object[] ruleArray=null;\r\r
	18	\r\r
	19	public GenericTransformator(File phonetic)throws IOException{\r\r
	20	buildRules(new BufferedReader(new FileReader(phonetic)));\r\r
	21	}\r\r
	22	\r\r
	23	/**\r\r
	24	* Returns the phonetic code of the word.\r\r
	25	*/\r\r
	26	public String transform(String word) { \r\r
	27	if(ruleArray==null)\r\r
	28	return null;\r\r
	29	TransformationRule rule;\r\r
	30	StringBuffer str=new StringBuffer(word.toUpperCase());\r\r
	31	int strLength=str.length();\r\r
	32	int startPos=0, add=1;\r\r
	33	\r\r
	34	while(startPos<strLength){\r\r
	35	//System.out.println("StartPos:"+startPos);\r\r
	36	add=1;\r\r
	37	for(int i=0;i<ruleArray.length;i++){\r\r
	38	//System.out.println("Testing rule#:"+i);\r\r
	39	rule=(TransformationRule)ruleArray[i];\r\r
	40	if(rule.startsWithExp() && startPos>0)\r\r
	41	continue;\r\r
	42	if(startPos+rule.lengthOfMatch()>=strLength)\r\r
	43	continue;\r\r
	44	if(rule.isMatching(str,startPos)){\r\r
	45	str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());\r\r
	46	add=rule.getReplaceExp().length();\r\r
	47	strLength-=rule.getTakeOut();\r\r
	48	strLength+=add;\r\r
	49	//System.out.println("Replacing with rule#:"+i+" add="+add);\r\r
	50	break;\r\r
	51	}\r\r
	52	}\r\r
	53	startPos+=add;\r\r
	54	}\r\r
	55	return str.toString();\r\r
	56	}\r\r
	57	\r\r
	58	// Used to build up the transformastion table.\r\r
	59	private void buildRules(BufferedReader in)throws IOException{\r\r
	60	String read=null;\r\r
	61	LinkedList ruleList=new LinkedList();\r\r
	62	while((read=in.readLine())!=null){\r\r
	63	buildRule(realTrimmer(read),ruleList);\r\r
	64	}\r\r
	65	ruleArray=ruleList.toArray();\r\r
	66	}\r\r
	67	\r\r
	68	// Here is where the real work of reading the phonetics file is done.\r\r
	69	private void buildRule(String str, LinkedList ruleList){\r\r
	70	if(str.length()<1)\r\r
	71	return;\r\r
	72	if(str.startsWith("version"))\r\r
	73	return;\r\r
	74	\r\r
	75	TransformationRule rule=null;\r\r
	76	StringBuffer matchExp=new StringBuffer();\r\r
	77	StringBuffer replaceExp=new StringBuffer();\r\r
	78	boolean start=false, end=false;\r\r
	79	int takeOutPart=0, matchLength=0;\r\r
	80	boolean match=true, inMulti=false;\r\r
	81	for(int i=0;i<str.length();i++){\r\r
	82	if(Character.isWhitespace(str.charAt(i))){\r\r
	83	match=false;\r\r
	84	}else{\r\r
	85	if(match){\r\r
	86	if (!isReservedChar(str.charAt(i))){\r\r
	87	matchExp.append(str.charAt(i));\r\r
	88	if(!inMulti){\r\r
	89	takeOutPart++;\r\r
	90	matchLength++;\r\r
	91	}\r\r
	92	if(str.charAt(i)==STARTMULTI \|\| str.charAt(i)==ENDMULTI)\r\r
	93	inMulti=!inMulti;\r\r
	94	}\r\r
	95	if (str.charAt(i)=='-')\r\r
	96	takeOutPart--;\r\r
	97	if (str.charAt(i)=='^')\r\r
	98	start=true;\r\r
	99	if (str.charAt(i)=='$')\r\r
	100	end=true;\r\r
	101	}else{\r\r
	102	replaceExp.append(str.charAt(i));\r\r
	103	}\r\r
	104	}\r\r
	105	}\r\r
	106	rule=new TransformationRule(matchExp.toString(), replaceExp.toString()\r\r
	107	, takeOutPart, matchLength, start, end);\r\r
	108	ruleList.add(rule);\r\r
	109	}\r\r
	110	\r\r
	111	// Chars with special meaning to aspell. Not everyone is implemented here.\r\r
	112	private boolean isReservedChar(char ch){\r\r
	113	if(ch=='<' \|\| ch=='>' \|\| ch=='^' \|\| ch=='$' \|\| ch=='-' \|\| Character.isDigit(ch))\r\r
	114	return true;\r\r
	115	return false;\r\r
	116	}\r\r
	117	\r\r
	118	// Trims off everything we don't care about.\r\r
	119	private String realTrimmer(String row){\r\r
	120	int pos=row.indexOf('#');\r\r
	121	if(pos!=-1){\r\r
	122	row=row.substring(0,pos);\r\r
	123	}\r\r
	124	return row.trim();\r\r
	125	}\r\r
	126	\r\r
	127	// Inner Classes\r\r
	128	/*\r\r
	129	* Holds the match string and the replace string and all the rule attributes.\r\r
	130	* Is responsible for indicating matches.\r\r
	131	*/\r\r
	132	private class TransformationRule{\r\r
	133	\r\r
	134	private String replace;\r\r
	135	private char[] match;\r\r
	136	// takeOut=number of chars to replace; \r\r
	137	// matchLength=length of matching string counting multies as one.\r\r
	138	private int takeOut, matchLength;\r\r
	139	private boolean start, end;\r\r
	140	\r\r
	141	// Construktor\r\r
	142	public TransformationRule(String match, String replace, int takeout\r\r
	143	, int matchLength, boolean start, boolean end){\r\r
	144	this.match=match.toCharArray();\r\r
	145	this.replace=replace;\r\r
	146	this.takeOut=takeout;\r\r
	147	this.matchLength=matchLength;\r\r
	148	this.start=start;\r\r
	149	this.end=end;\r\r
	150	}\r\r
	151	\r\r
	152	/*\r\r
	153	* Returns true if word from pos and forward matches the match string.\r\r
	154	* Precondition: wordPos+matchLength<word.length()\r\r
	155	*/\r\r
	156	public boolean isMatching(StringBuffer word, int wordPos){\r\r
	157	boolean matching=true, inMulti=false, multiMatch=false;\r\r
	158	char matchCh;\r\r
	159	\r\r
	160	for(int matchPos=0;matchPos<match.length;matchPos++){\r\r
	161	matchCh=match[matchPos];\r\r
	162	if(matchCh==STARTMULTI \|\| matchCh==ENDMULTI){\r\r
	163	inMulti=!inMulti;\r\r
	164	if(!inMulti)\r\r
	165	matching=matching & multiMatch;\r\r
	166	else\r\r
	167	multiMatch=false;\r\r
	168	}else{\r\r
	169	if(matchCh!=word.charAt(wordPos)){\r\r
	170	if(inMulti)\r\r
	171	multiMatch=multiMatch \| false;\r\r
	172	else\r\r
	173	matching=false;\r\r
	174	}else{\r\r
	175	if(inMulti)\r\r
	176	multiMatch=multiMatch \| true;\r\r
	177	else\r\r
	178	matching=true;\r\r
	179	}\r\r
	180	if(!inMulti)\r\r
	181	wordPos++;\r\r
	182	if(!matching)\r\r
	183	break;\r\r
	184	}\r\r
	185	}\r\r
	186	if(end && wordPos!=word.length()-1)\r\r
	187	matching=false;\r\r
	188	return matching;\r\r
	189	}\r\r
	190	\r\r
	191	public String getReplaceExp(){\r\r
	192	return replace;\r\r
	193	}\r\r
	194	\r\r
	195	public int getTakeOut(){\r\r
	196	return takeOut;\r\r
	197	}\r\r
	198	\r\r
	199	public boolean startsWithExp(){\r\r
	200	return start;\r\r
	201	}\r\r
	202	\r\r
	203	public int lengthOfMatch(){\r\r
	204	return matchLength;\r\r
	205	}\r\r
	206	\r\r
	207	// Just for debugging purposes.\r\r
	208	public String toString(){\r\r
	209	return "Match:"+String.valueOf(match)\r\r
	210	+" Replace:"+replace\r\r
	211	+" TakeOut:"+takeOut\r\r
	212	+" MatchLength:"+matchLength\r\r
	213	+" Start:"+start\r\r
	214	+" End:"+end;\r\r
	215	}\r\r
	216	\r\r
	217	}\r\r
	218	}\r\r