Commit | Line | Data |
---|---|---|
6dd70280 JL |
1 | package com.swabunga.spell.engine;\r\r |
2 | \r\r | |
3 | import java.io.*;\r\r | |
4 | import java.util.*;\r\r | |
5 | \r\r | |
6 | /** A Generic implementation of a transformator takes an aspell\r\r | |
7 | * phonetics file and constructs some sort of transformationtable using\r\r | |
8 | * the inner class Rule.\r\r | |
9 | *\r\r | |
10 | * @author Robert Gustavsson (robert@lindesign.se)\r\r | |
11 | */\r\r | |
12 | public class GenericTransformator implements Transformator{\r\r | |
13 | \r\r | |
14 | public static final char STARTMULTI='(';\r\r | |
15 | public static final char ENDMULTI=')';\r\r | |
16 | \r\r | |
17 | Object[] ruleArray=null;\r\r | |
18 | \r\r | |
19 | public GenericTransformator(File phonetic)throws IOException{\r\r | |
20 | buildRules(new BufferedReader(new FileReader(phonetic)));\r\r | |
21 | }\r\r | |
22 | \r\r | |
23 | /**\r\r | |
24 | * Returns the phonetic code of the word.\r\r | |
25 | */\r\r | |
26 | public String transform(String word) { \r\r | |
27 | if(ruleArray==null)\r\r | |
28 | return null;\r\r | |
29 | TransformationRule rule;\r\r | |
30 | StringBuffer str=new StringBuffer(word.toUpperCase());\r\r | |
31 | int strLength=str.length();\r\r | |
32 | int startPos=0, add=1;\r\r | |
33 | \r\r | |
34 | while(startPos<strLength){\r\r | |
35 | //System.out.println("StartPos:"+startPos);\r\r | |
36 | add=1;\r\r | |
37 | for(int i=0;i<ruleArray.length;i++){\r\r | |
38 | //System.out.println("Testing rule#:"+i);\r\r | |
39 | rule=(TransformationRule)ruleArray[i];\r\r | |
40 | if(rule.startsWithExp() && startPos>0)\r\r | |
41 | continue;\r\r | |
42 | if(startPos+rule.lengthOfMatch()>=strLength)\r\r | |
43 | continue;\r\r | |
44 | if(rule.isMatching(str,startPos)){\r\r | |
45 | str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());\r\r | |
46 | add=rule.getReplaceExp().length();\r\r | |
47 | strLength-=rule.getTakeOut();\r\r | |
48 | strLength+=add;\r\r | |
49 | //System.out.println("Replacing with rule#:"+i+" add="+add);\r\r | |
50 | break;\r\r | |
51 | }\r\r | |
52 | }\r\r | |
53 | startPos+=add;\r\r | |
54 | }\r\r | |
55 | return str.toString();\r\r | |
56 | }\r\r | |
57 | \r\r | |
58 | // Used to build up the transformastion table.\r\r | |
59 | private void buildRules(BufferedReader in)throws IOException{\r\r | |
60 | String read=null;\r\r | |
61 | LinkedList ruleList=new LinkedList();\r\r | |
62 | while((read=in.readLine())!=null){\r\r | |
63 | buildRule(realTrimmer(read),ruleList);\r\r | |
64 | }\r\r | |
65 | ruleArray=ruleList.toArray();\r\r | |
66 | }\r\r | |
67 | \r\r | |
68 | // Here is where the real work of reading the phonetics file is done.\r\r | |
69 | private void buildRule(String str, LinkedList ruleList){\r\r | |
70 | if(str.length()<1)\r\r | |
71 | return;\r\r | |
72 | if(str.startsWith("version"))\r\r | |
73 | return;\r\r | |
74 | \r\r | |
75 | TransformationRule rule=null;\r\r | |
76 | StringBuffer matchExp=new StringBuffer();\r\r | |
77 | StringBuffer replaceExp=new StringBuffer();\r\r | |
78 | boolean start=false, end=false;\r\r | |
79 | int takeOutPart=0, matchLength=0;\r\r | |
80 | boolean match=true, inMulti=false;\r\r | |
81 | for(int i=0;i<str.length();i++){\r\r | |
82 | if(Character.isWhitespace(str.charAt(i))){\r\r | |
83 | match=false;\r\r | |
84 | }else{\r\r | |
85 | if(match){\r\r | |
86 | if (!isReservedChar(str.charAt(i))){\r\r | |
87 | matchExp.append(str.charAt(i));\r\r | |
88 | if(!inMulti){\r\r | |
89 | takeOutPart++;\r\r | |
90 | matchLength++;\r\r | |
91 | }\r\r | |
92 | if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)\r\r | |
93 | inMulti=!inMulti;\r\r | |
94 | }\r\r | |
95 | if (str.charAt(i)=='-')\r\r | |
96 | takeOutPart--;\r\r | |
97 | if (str.charAt(i)=='^')\r\r | |
98 | start=true;\r\r | |
99 | if (str.charAt(i)=='$')\r\r | |
100 | end=true;\r\r | |
101 | }else{\r\r | |
102 | replaceExp.append(str.charAt(i));\r\r | |
103 | }\r\r | |
104 | }\r\r | |
105 | }\r\r | |
106 | rule=new TransformationRule(matchExp.toString(), replaceExp.toString()\r\r | |
107 | , takeOutPart, matchLength, start, end);\r\r | |
108 | ruleList.add(rule);\r\r | |
109 | }\r\r | |
110 | \r\r | |
111 | // Chars with special meaning to aspell. Not everyone is implemented here.\r\r | |
112 | private boolean isReservedChar(char ch){\r\r | |
113 | if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))\r\r | |
114 | return true;\r\r | |
115 | return false;\r\r | |
116 | }\r\r | |
117 | \r\r | |
118 | // Trims off everything we don't care about.\r\r | |
119 | private String realTrimmer(String row){\r\r | |
120 | int pos=row.indexOf('#');\r\r | |
121 | if(pos!=-1){\r\r | |
122 | row=row.substring(0,pos);\r\r | |
123 | }\r\r | |
124 | return row.trim();\r\r | |
125 | }\r\r | |
126 | \r\r | |
127 | // Inner Classes\r\r | |
128 | /*\r\r | |
129 | * Holds the match string and the replace string and all the rule attributes.\r\r | |
130 | * Is responsible for indicating matches.\r\r | |
131 | */\r\r | |
132 | private class TransformationRule{\r\r | |
133 | \r\r | |
134 | private String replace;\r\r | |
135 | private char[] match;\r\r | |
136 | // takeOut=number of chars to replace; \r\r | |
137 | // matchLength=length of matching string counting multies as one.\r\r | |
138 | private int takeOut, matchLength;\r\r | |
139 | private boolean start, end;\r\r | |
140 | \r\r | |
141 | // Construktor\r\r | |
142 | public TransformationRule(String match, String replace, int takeout\r\r | |
143 | , int matchLength, boolean start, boolean end){\r\r | |
144 | this.match=match.toCharArray();\r\r | |
145 | this.replace=replace;\r\r | |
146 | this.takeOut=takeout;\r\r | |
147 | this.matchLength=matchLength;\r\r | |
148 | this.start=start;\r\r | |
149 | this.end=end;\r\r | |
150 | }\r\r | |
151 | \r\r | |
152 | /*\r\r | |
153 | * Returns true if word from pos and forward matches the match string.\r\r | |
154 | * Precondition: wordPos+matchLength<word.length()\r\r | |
155 | */\r\r | |
156 | public boolean isMatching(StringBuffer word, int wordPos){\r\r | |
157 | boolean matching=true, inMulti=false, multiMatch=false;\r\r | |
158 | char matchCh;\r\r | |
159 | \r\r | |
160 | for(int matchPos=0;matchPos<match.length;matchPos++){\r\r | |
161 | matchCh=match[matchPos];\r\r | |
162 | if(matchCh==STARTMULTI || matchCh==ENDMULTI){\r\r | |
163 | inMulti=!inMulti;\r\r | |
164 | if(!inMulti)\r\r | |
165 | matching=matching & multiMatch;\r\r | |
166 | else\r\r | |
167 | multiMatch=false;\r\r | |
168 | }else{\r\r | |
169 | if(matchCh!=word.charAt(wordPos)){\r\r | |
170 | if(inMulti)\r\r | |
171 | multiMatch=multiMatch | false;\r\r | |
172 | else\r\r | |
173 | matching=false;\r\r | |
174 | }else{\r\r | |
175 | if(inMulti)\r\r | |
176 | multiMatch=multiMatch | true;\r\r | |
177 | else\r\r | |
178 | matching=true;\r\r | |
179 | }\r\r | |
180 | if(!inMulti)\r\r | |
181 | wordPos++;\r\r | |
182 | if(!matching)\r\r | |
183 | break;\r\r | |
184 | }\r\r | |
185 | }\r\r | |
186 | if(end && wordPos!=word.length()-1)\r\r | |
187 | matching=false;\r\r | |
188 | return matching;\r\r | |
189 | }\r\r | |
190 | \r\r | |
191 | public String getReplaceExp(){\r\r | |
192 | return replace;\r\r | |
193 | }\r\r | |
194 | \r\r | |
195 | public int getTakeOut(){\r\r | |
196 | return takeOut;\r\r | |
197 | }\r\r | |
198 | \r\r | |
199 | public boolean startsWithExp(){\r\r | |
200 | return start;\r\r | |
201 | }\r\r | |
202 | \r\r | |
203 | public int lengthOfMatch(){\r\r | |
204 | return matchLength;\r\r | |
205 | }\r\r | |
206 | \r\r | |
207 | // Just for debugging purposes.\r\r | |
208 | public String toString(){\r\r | |
209 | return "Match:"+String.valueOf(match)\r\r | |
210 | +" Replace:"+replace\r\r | |
211 | +" TakeOut:"+takeOut\r\r | |
212 | +" MatchLength:"+matchLength\r\r | |
213 | +" Start:"+start\r\r | |
214 | +" End:"+end;\r\r | |
215 | }\r\r | |
216 | \r\r | |
217 | }\r\r | |
218 | }\r\r |