--- /dev/null
+package com.swabunga.spell.engine;\r\r
+\r\r
+import java.io.*;\r\r
+import java.util.*;\r\r
+\r\r
+/** A Generic implementation of a transformator takes an aspell\r\r
+ * phonetics file and constructs some sort of transformationtable using\r\r
+ * the inner class Rule.\r\r
+ *\r\r
+ * @author Robert Gustavsson (robert@lindesign.se)\r\r
+ */\r\r
+public class GenericTransformator implements Transformator{\r\r
+ \r\r
+ public static final char STARTMULTI='(';\r\r
+ public static final char ENDMULTI=')';\r\r
+\r\r
+ Object[] ruleArray=null;\r\r
+\r\r
+ public GenericTransformator(File phonetic)throws IOException{\r\r
+ buildRules(new BufferedReader(new FileReader(phonetic)));\r\r
+ }\r\r
+\r\r
+ /**\r\r
+ * Returns the phonetic code of the word.\r\r
+ */\r\r
+ public String transform(String word) { \r\r
+ if(ruleArray==null)\r\r
+ return null;\r\r
+ TransformationRule rule;\r\r
+ StringBuffer str=new StringBuffer(word.toUpperCase());\r\r
+ int strLength=str.length();\r\r
+ int startPos=0, add=1;\r\r
+\r\r
+ while(startPos<strLength){\r\r
+ //System.out.println("StartPos:"+startPos);\r\r
+ add=1;\r\r
+ for(int i=0;i<ruleArray.length;i++){\r\r
+ //System.out.println("Testing rule#:"+i);\r\r
+ rule=(TransformationRule)ruleArray[i];\r\r
+ if(rule.startsWithExp() && startPos>0)\r\r
+ continue;\r\r
+ if(startPos+rule.lengthOfMatch()>=strLength)\r\r
+ continue;\r\r
+ if(rule.isMatching(str,startPos)){\r\r
+ str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());\r\r
+ add=rule.getReplaceExp().length();\r\r
+ strLength-=rule.getTakeOut();\r\r
+ strLength+=add;\r\r
+ //System.out.println("Replacing with rule#:"+i+" add="+add);\r\r
+ break;\r\r
+ }\r\r
+ }\r\r
+ startPos+=add;\r\r
+ }\r\r
+ return str.toString();\r\r
+ }\r\r
+\r\r
+ // Used to build up the transformastion table.\r\r
+ private void buildRules(BufferedReader in)throws IOException{\r\r
+ String read=null;\r\r
+ LinkedList ruleList=new LinkedList();\r\r
+ while((read=in.readLine())!=null){\r\r
+ buildRule(realTrimmer(read),ruleList);\r\r
+ }\r\r
+ ruleArray=ruleList.toArray();\r\r
+ }\r\r
+ \r\r
+ // Here is where the real work of reading the phonetics file is done.\r\r
+ private void buildRule(String str, LinkedList ruleList){\r\r
+ if(str.length()<1)\r\r
+ return;\r\r
+ if(str.startsWith("version"))\r\r
+ return;\r\r
+ \r\r
+ TransformationRule rule=null;\r\r
+ StringBuffer matchExp=new StringBuffer();\r\r
+ StringBuffer replaceExp=new StringBuffer();\r\r
+ boolean start=false, end=false;\r\r
+ int takeOutPart=0, matchLength=0;\r\r
+ boolean match=true, inMulti=false;\r\r
+ for(int i=0;i<str.length();i++){\r\r
+ if(Character.isWhitespace(str.charAt(i))){\r\r
+ match=false;\r\r
+ }else{\r\r
+ if(match){\r\r
+ if (!isReservedChar(str.charAt(i))){\r\r
+ matchExp.append(str.charAt(i));\r\r
+ if(!inMulti){\r\r
+ takeOutPart++;\r\r
+ matchLength++;\r\r
+ }\r\r
+ if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)\r\r
+ inMulti=!inMulti;\r\r
+ }\r\r
+ if (str.charAt(i)=='-')\r\r
+ takeOutPart--;\r\r
+ if (str.charAt(i)=='^')\r\r
+ start=true;\r\r
+ if (str.charAt(i)=='$')\r\r
+ end=true;\r\r
+ }else{\r\r
+ replaceExp.append(str.charAt(i));\r\r
+ }\r\r
+ }\r\r
+ }\r\r
+ rule=new TransformationRule(matchExp.toString(), replaceExp.toString()\r\r
+ , takeOutPart, matchLength, start, end);\r\r
+ ruleList.add(rule);\r\r
+ }\r\r
+ \r\r
+ // Chars with special meaning to aspell. Not everyone is implemented here.\r\r
+ private boolean isReservedChar(char ch){\r\r
+ if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))\r\r
+ return true;\r\r
+ return false;\r\r
+ }\r\r
+\r\r
+ // Trims off everything we don't care about.\r\r
+ private String realTrimmer(String row){\r\r
+ int pos=row.indexOf('#');\r\r
+ if(pos!=-1){\r\r
+ row=row.substring(0,pos);\r\r
+ }\r\r
+ return row.trim();\r\r
+ }\r\r
+\r\r
+ // Inner Classes\r\r
+ /*\r\r
+ * Holds the match string and the replace string and all the rule attributes.\r\r
+ * Is responsible for indicating matches.\r\r
+ */\r\r
+ private class TransformationRule{\r\r
+\r\r
+ private String replace;\r\r
+ private char[] match;\r\r
+ // takeOut=number of chars to replace; \r\r
+ // matchLength=length of matching string counting multies as one.\r\r
+ private int takeOut, matchLength;\r\r
+ private boolean start, end;\r\r
+\r\r
+ // Construktor\r\r
+ public TransformationRule(String match, String replace, int takeout\r\r
+ , int matchLength, boolean start, boolean end){\r\r
+ this.match=match.toCharArray();\r\r
+ this.replace=replace;\r\r
+ this.takeOut=takeout;\r\r
+ this.matchLength=matchLength;\r\r
+ this.start=start;\r\r
+ this.end=end;\r\r
+ }\r\r
+\r\r
+ /*\r\r
+ * Returns true if word from pos and forward matches the match string.\r\r
+ * Precondition: wordPos+matchLength<word.length()\r\r
+ */\r\r
+ public boolean isMatching(StringBuffer word, int wordPos){\r\r
+ boolean matching=true, inMulti=false, multiMatch=false;\r\r
+ char matchCh;\r\r
+ \r\r
+ for(int matchPos=0;matchPos<match.length;matchPos++){\r\r
+ matchCh=match[matchPos];\r\r
+ if(matchCh==STARTMULTI || matchCh==ENDMULTI){\r\r
+ inMulti=!inMulti;\r\r
+ if(!inMulti)\r\r
+ matching=matching & multiMatch;\r\r
+ else\r\r
+ multiMatch=false;\r\r
+ }else{\r\r
+ if(matchCh!=word.charAt(wordPos)){\r\r
+ if(inMulti)\r\r
+ multiMatch=multiMatch | false;\r\r
+ else\r\r
+ matching=false;\r\r
+ }else{\r\r
+ if(inMulti)\r\r
+ multiMatch=multiMatch | true;\r\r
+ else\r\r
+ matching=true;\r\r
+ }\r\r
+ if(!inMulti)\r\r
+ wordPos++;\r\r
+ if(!matching)\r\r
+ break;\r\r
+ }\r\r
+ }\r\r
+ if(end && wordPos!=word.length()-1)\r\r
+ matching=false;\r\r
+ return matching;\r\r
+ }\r\r
+\r\r
+ public String getReplaceExp(){\r\r
+ return replace;\r\r
+ }\r\r
+\r\r
+ public int getTakeOut(){\r\r
+ return takeOut;\r\r
+ }\r\r
+\r\r
+ public boolean startsWithExp(){\r\r
+ return start;\r\r
+ }\r\r
+ \r\r
+ public int lengthOfMatch(){\r\r
+ return matchLength;\r\r
+ }\r\r
+ \r\r
+ // Just for debugging purposes.\r\r
+ public String toString(){\r\r
+ return "Match:"+String.valueOf(match)\r\r
+ +" Replace:"+replace\r\r
+ +" TakeOut:"+takeOut\r\r
+ +" MatchLength:"+matchLength\r\r
+ +" Start:"+start\r\r
+ +" End:"+end;\r\r
+ }\r\r
+\r\r
+ }\r\r
+}\r\r