Initial revision
[old-projects.git] / ekit / com / swabunga / spell / engine / GenericTransformator.java
CommitLineData
6dd70280
JL
1package com.swabunga.spell.engine;\r\r
2\r\r
3import java.io.*;\r\r
4import java.util.*;\r\r
5\r\r
6/** A Generic implementation of a transformator takes an aspell\r\r
7 * phonetics file and constructs some sort of transformationtable using\r\r
8 * the inner class Rule.\r\r
9 *\r\r
10 * @author Robert Gustavsson (robert@lindesign.se)\r\r
11 */\r\r
12public class GenericTransformator implements Transformator{\r\r
13 \r\r
14 public static final char STARTMULTI='(';\r\r
15 public static final char ENDMULTI=')';\r\r
16\r\r
17 Object[] ruleArray=null;\r\r
18\r\r
19 public GenericTransformator(File phonetic)throws IOException{\r\r
20 buildRules(new BufferedReader(new FileReader(phonetic)));\r\r
21 }\r\r
22\r\r
23 /**\r\r
24 * Returns the phonetic code of the word.\r\r
25 */\r\r
26 public String transform(String word) { \r\r
27 if(ruleArray==null)\r\r
28 return null;\r\r
29 TransformationRule rule;\r\r
30 StringBuffer str=new StringBuffer(word.toUpperCase());\r\r
31 int strLength=str.length();\r\r
32 int startPos=0, add=1;\r\r
33\r\r
34 while(startPos<strLength){\r\r
35 //System.out.println("StartPos:"+startPos);\r\r
36 add=1;\r\r
37 for(int i=0;i<ruleArray.length;i++){\r\r
38 //System.out.println("Testing rule#:"+i);\r\r
39 rule=(TransformationRule)ruleArray[i];\r\r
40 if(rule.startsWithExp() && startPos>0)\r\r
41 continue;\r\r
42 if(startPos+rule.lengthOfMatch()>=strLength)\r\r
43 continue;\r\r
44 if(rule.isMatching(str,startPos)){\r\r
45 str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());\r\r
46 add=rule.getReplaceExp().length();\r\r
47 strLength-=rule.getTakeOut();\r\r
48 strLength+=add;\r\r
49 //System.out.println("Replacing with rule#:"+i+" add="+add);\r\r
50 break;\r\r
51 }\r\r
52 }\r\r
53 startPos+=add;\r\r
54 }\r\r
55 return str.toString();\r\r
56 }\r\r
57\r\r
58 // Used to build up the transformastion table.\r\r
59 private void buildRules(BufferedReader in)throws IOException{\r\r
60 String read=null;\r\r
61 LinkedList ruleList=new LinkedList();\r\r
62 while((read=in.readLine())!=null){\r\r
63 buildRule(realTrimmer(read),ruleList);\r\r
64 }\r\r
65 ruleArray=ruleList.toArray();\r\r
66 }\r\r
67 \r\r
68 // Here is where the real work of reading the phonetics file is done.\r\r
69 private void buildRule(String str, LinkedList ruleList){\r\r
70 if(str.length()<1)\r\r
71 return;\r\r
72 if(str.startsWith("version"))\r\r
73 return;\r\r
74 \r\r
75 TransformationRule rule=null;\r\r
76 StringBuffer matchExp=new StringBuffer();\r\r
77 StringBuffer replaceExp=new StringBuffer();\r\r
78 boolean start=false, end=false;\r\r
79 int takeOutPart=0, matchLength=0;\r\r
80 boolean match=true, inMulti=false;\r\r
81 for(int i=0;i<str.length();i++){\r\r
82 if(Character.isWhitespace(str.charAt(i))){\r\r
83 match=false;\r\r
84 }else{\r\r
85 if(match){\r\r
86 if (!isReservedChar(str.charAt(i))){\r\r
87 matchExp.append(str.charAt(i));\r\r
88 if(!inMulti){\r\r
89 takeOutPart++;\r\r
90 matchLength++;\r\r
91 }\r\r
92 if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)\r\r
93 inMulti=!inMulti;\r\r
94 }\r\r
95 if (str.charAt(i)=='-')\r\r
96 takeOutPart--;\r\r
97 if (str.charAt(i)=='^')\r\r
98 start=true;\r\r
99 if (str.charAt(i)=='$')\r\r
100 end=true;\r\r
101 }else{\r\r
102 replaceExp.append(str.charAt(i));\r\r
103 }\r\r
104 }\r\r
105 }\r\r
106 rule=new TransformationRule(matchExp.toString(), replaceExp.toString()\r\r
107 , takeOutPart, matchLength, start, end);\r\r
108 ruleList.add(rule);\r\r
109 }\r\r
110 \r\r
111 // Chars with special meaning to aspell. Not everyone is implemented here.\r\r
112 private boolean isReservedChar(char ch){\r\r
113 if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))\r\r
114 return true;\r\r
115 return false;\r\r
116 }\r\r
117\r\r
118 // Trims off everything we don't care about.\r\r
119 private String realTrimmer(String row){\r\r
120 int pos=row.indexOf('#');\r\r
121 if(pos!=-1){\r\r
122 row=row.substring(0,pos);\r\r
123 }\r\r
124 return row.trim();\r\r
125 }\r\r
126\r\r
127 // Inner Classes\r\r
128 /*\r\r
129 * Holds the match string and the replace string and all the rule attributes.\r\r
130 * Is responsible for indicating matches.\r\r
131 */\r\r
132 private class TransformationRule{\r\r
133\r\r
134 private String replace;\r\r
135 private char[] match;\r\r
136 // takeOut=number of chars to replace; \r\r
137 // matchLength=length of matching string counting multies as one.\r\r
138 private int takeOut, matchLength;\r\r
139 private boolean start, end;\r\r
140\r\r
141 // Construktor\r\r
142 public TransformationRule(String match, String replace, int takeout\r\r
143 , int matchLength, boolean start, boolean end){\r\r
144 this.match=match.toCharArray();\r\r
145 this.replace=replace;\r\r
146 this.takeOut=takeout;\r\r
147 this.matchLength=matchLength;\r\r
148 this.start=start;\r\r
149 this.end=end;\r\r
150 }\r\r
151\r\r
152 /*\r\r
153 * Returns true if word from pos and forward matches the match string.\r\r
154 * Precondition: wordPos+matchLength<word.length()\r\r
155 */\r\r
156 public boolean isMatching(StringBuffer word, int wordPos){\r\r
157 boolean matching=true, inMulti=false, multiMatch=false;\r\r
158 char matchCh;\r\r
159 \r\r
160 for(int matchPos=0;matchPos<match.length;matchPos++){\r\r
161 matchCh=match[matchPos];\r\r
162 if(matchCh==STARTMULTI || matchCh==ENDMULTI){\r\r
163 inMulti=!inMulti;\r\r
164 if(!inMulti)\r\r
165 matching=matching & multiMatch;\r\r
166 else\r\r
167 multiMatch=false;\r\r
168 }else{\r\r
169 if(matchCh!=word.charAt(wordPos)){\r\r
170 if(inMulti)\r\r
171 multiMatch=multiMatch | false;\r\r
172 else\r\r
173 matching=false;\r\r
174 }else{\r\r
175 if(inMulti)\r\r
176 multiMatch=multiMatch | true;\r\r
177 else\r\r
178 matching=true;\r\r
179 }\r\r
180 if(!inMulti)\r\r
181 wordPos++;\r\r
182 if(!matching)\r\r
183 break;\r\r
184 }\r\r
185 }\r\r
186 if(end && wordPos!=word.length()-1)\r\r
187 matching=false;\r\r
188 return matching;\r\r
189 }\r\r
190\r\r
191 public String getReplaceExp(){\r\r
192 return replace;\r\r
193 }\r\r
194\r\r
195 public int getTakeOut(){\r\r
196 return takeOut;\r\r
197 }\r\r
198\r\r
199 public boolean startsWithExp(){\r\r
200 return start;\r\r
201 }\r\r
202 \r\r
203 public int lengthOfMatch(){\r\r
204 return matchLength;\r\r
205 }\r\r
206 \r\r
207 // Just for debugging purposes.\r\r
208 public String toString(){\r\r
209 return "Match:"+String.valueOf(match)\r\r
210 +" Replace:"+replace\r\r
211 +" TakeOut:"+takeOut\r\r
212 +" MatchLength:"+matchLength\r\r
213 +" Start:"+start\r\r
214 +" End:"+end;\r\r
215 }\r\r
216\r\r
217 }\r\r
218}\r\r