| 1 | package com.swabunga.spell.engine;\r\r |
| 2 | \r\r |
| 3 | import java.io.*;\r\r |
| 4 | import java.util.*;\r\r |
| 5 | \r\r |
| 6 | /** A Generic implementation of a transformator takes an aspell\r\r |
| 7 | * phonetics file and constructs some sort of transformationtable using\r\r |
| 8 | * the inner class Rule.\r\r |
| 9 | *\r\r |
| 10 | * @author Robert Gustavsson (robert@lindesign.se)\r\r |
| 11 | */\r\r |
| 12 | public class GenericTransformator implements Transformator{\r\r |
| 13 | \r\r |
| 14 | public static final char STARTMULTI='(';\r\r |
| 15 | public static final char ENDMULTI=')';\r\r |
| 16 | \r\r |
| 17 | Object[] ruleArray=null;\r\r |
| 18 | \r\r |
| 19 | public GenericTransformator(File phonetic)throws IOException{\r\r |
| 20 | buildRules(new BufferedReader(new FileReader(phonetic)));\r\r |
| 21 | }\r\r |
| 22 | \r\r |
| 23 | /**\r\r |
| 24 | * Returns the phonetic code of the word.\r\r |
| 25 | */\r\r |
| 26 | public String transform(String word) { \r\r |
| 27 | if(ruleArray==null)\r\r |
| 28 | return null;\r\r |
| 29 | TransformationRule rule;\r\r |
| 30 | StringBuffer str=new StringBuffer(word.toUpperCase());\r\r |
| 31 | int strLength=str.length();\r\r |
| 32 | int startPos=0, add=1;\r\r |
| 33 | \r\r |
| 34 | while(startPos<strLength){\r\r |
| 35 | //System.out.println("StartPos:"+startPos);\r\r |
| 36 | add=1;\r\r |
| 37 | for(int i=0;i<ruleArray.length;i++){\r\r |
| 38 | //System.out.println("Testing rule#:"+i);\r\r |
| 39 | rule=(TransformationRule)ruleArray[i];\r\r |
| 40 | if(rule.startsWithExp() && startPos>0)\r\r |
| 41 | continue;\r\r |
| 42 | if(startPos+rule.lengthOfMatch()>=strLength)\r\r |
| 43 | continue;\r\r |
| 44 | if(rule.isMatching(str,startPos)){\r\r |
| 45 | str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());\r\r |
| 46 | add=rule.getReplaceExp().length();\r\r |
| 47 | strLength-=rule.getTakeOut();\r\r |
| 48 | strLength+=add;\r\r |
| 49 | //System.out.println("Replacing with rule#:"+i+" add="+add);\r\r |
| 50 | break;\r\r |
| 51 | }\r\r |
| 52 | }\r\r |
| 53 | startPos+=add;\r\r |
| 54 | }\r\r |
| 55 | return str.toString();\r\r |
| 56 | }\r\r |
| 57 | \r\r |
| 58 | // Used to build up the transformastion table.\r\r |
| 59 | private void buildRules(BufferedReader in)throws IOException{\r\r |
| 60 | String read=null;\r\r |
| 61 | LinkedList ruleList=new LinkedList();\r\r |
| 62 | while((read=in.readLine())!=null){\r\r |
| 63 | buildRule(realTrimmer(read),ruleList);\r\r |
| 64 | }\r\r |
| 65 | ruleArray=ruleList.toArray();\r\r |
| 66 | }\r\r |
| 67 | \r\r |
| 68 | // Here is where the real work of reading the phonetics file is done.\r\r |
| 69 | private void buildRule(String str, LinkedList ruleList){\r\r |
| 70 | if(str.length()<1)\r\r |
| 71 | return;\r\r |
| 72 | if(str.startsWith("version"))\r\r |
| 73 | return;\r\r |
| 74 | \r\r |
| 75 | TransformationRule rule=null;\r\r |
| 76 | StringBuffer matchExp=new StringBuffer();\r\r |
| 77 | StringBuffer replaceExp=new StringBuffer();\r\r |
| 78 | boolean start=false, end=false;\r\r |
| 79 | int takeOutPart=0, matchLength=0;\r\r |
| 80 | boolean match=true, inMulti=false;\r\r |
| 81 | for(int i=0;i<str.length();i++){\r\r |
| 82 | if(Character.isWhitespace(str.charAt(i))){\r\r |
| 83 | match=false;\r\r |
| 84 | }else{\r\r |
| 85 | if(match){\r\r |
| 86 | if (!isReservedChar(str.charAt(i))){\r\r |
| 87 | matchExp.append(str.charAt(i));\r\r |
| 88 | if(!inMulti){\r\r |
| 89 | takeOutPart++;\r\r |
| 90 | matchLength++;\r\r |
| 91 | }\r\r |
| 92 | if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)\r\r |
| 93 | inMulti=!inMulti;\r\r |
| 94 | }\r\r |
| 95 | if (str.charAt(i)=='-')\r\r |
| 96 | takeOutPart--;\r\r |
| 97 | if (str.charAt(i)=='^')\r\r |
| 98 | start=true;\r\r |
| 99 | if (str.charAt(i)=='$')\r\r |
| 100 | end=true;\r\r |
| 101 | }else{\r\r |
| 102 | replaceExp.append(str.charAt(i));\r\r |
| 103 | }\r\r |
| 104 | }\r\r |
| 105 | }\r\r |
| 106 | rule=new TransformationRule(matchExp.toString(), replaceExp.toString()\r\r |
| 107 | , takeOutPart, matchLength, start, end);\r\r |
| 108 | ruleList.add(rule);\r\r |
| 109 | }\r\r |
| 110 | \r\r |
| 111 | // Chars with special meaning to aspell. Not everyone is implemented here.\r\r |
| 112 | private boolean isReservedChar(char ch){\r\r |
| 113 | if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))\r\r |
| 114 | return true;\r\r |
| 115 | return false;\r\r |
| 116 | }\r\r |
| 117 | \r\r |
| 118 | // Trims off everything we don't care about.\r\r |
| 119 | private String realTrimmer(String row){\r\r |
| 120 | int pos=row.indexOf('#');\r\r |
| 121 | if(pos!=-1){\r\r |
| 122 | row=row.substring(0,pos);\r\r |
| 123 | }\r\r |
| 124 | return row.trim();\r\r |
| 125 | }\r\r |
| 126 | \r\r |
| 127 | // Inner Classes\r\r |
| 128 | /*\r\r |
| 129 | * Holds the match string and the replace string and all the rule attributes.\r\r |
| 130 | * Is responsible for indicating matches.\r\r |
| 131 | */\r\r |
| 132 | private class TransformationRule{\r\r |
| 133 | \r\r |
| 134 | private String replace;\r\r |
| 135 | private char[] match;\r\r |
| 136 | // takeOut=number of chars to replace; \r\r |
| 137 | // matchLength=length of matching string counting multies as one.\r\r |
| 138 | private int takeOut, matchLength;\r\r |
| 139 | private boolean start, end;\r\r |
| 140 | \r\r |
| 141 | // Construktor\r\r |
| 142 | public TransformationRule(String match, String replace, int takeout\r\r |
| 143 | , int matchLength, boolean start, boolean end){\r\r |
| 144 | this.match=match.toCharArray();\r\r |
| 145 | this.replace=replace;\r\r |
| 146 | this.takeOut=takeout;\r\r |
| 147 | this.matchLength=matchLength;\r\r |
| 148 | this.start=start;\r\r |
| 149 | this.end=end;\r\r |
| 150 | }\r\r |
| 151 | \r\r |
| 152 | /*\r\r |
| 153 | * Returns true if word from pos and forward matches the match string.\r\r |
| 154 | * Precondition: wordPos+matchLength<word.length()\r\r |
| 155 | */\r\r |
| 156 | public boolean isMatching(StringBuffer word, int wordPos){\r\r |
| 157 | boolean matching=true, inMulti=false, multiMatch=false;\r\r |
| 158 | char matchCh;\r\r |
| 159 | \r\r |
| 160 | for(int matchPos=0;matchPos<match.length;matchPos++){\r\r |
| 161 | matchCh=match[matchPos];\r\r |
| 162 | if(matchCh==STARTMULTI || matchCh==ENDMULTI){\r\r |
| 163 | inMulti=!inMulti;\r\r |
| 164 | if(!inMulti)\r\r |
| 165 | matching=matching & multiMatch;\r\r |
| 166 | else\r\r |
| 167 | multiMatch=false;\r\r |
| 168 | }else{\r\r |
| 169 | if(matchCh!=word.charAt(wordPos)){\r\r |
| 170 | if(inMulti)\r\r |
| 171 | multiMatch=multiMatch | false;\r\r |
| 172 | else\r\r |
| 173 | matching=false;\r\r |
| 174 | }else{\r\r |
| 175 | if(inMulti)\r\r |
| 176 | multiMatch=multiMatch | true;\r\r |
| 177 | else\r\r |
| 178 | matching=true;\r\r |
| 179 | }\r\r |
| 180 | if(!inMulti)\r\r |
| 181 | wordPos++;\r\r |
| 182 | if(!matching)\r\r |
| 183 | break;\r\r |
| 184 | }\r\r |
| 185 | }\r\r |
| 186 | if(end && wordPos!=word.length()-1)\r\r |
| 187 | matching=false;\r\r |
| 188 | return matching;\r\r |
| 189 | }\r\r |
| 190 | \r\r |
| 191 | public String getReplaceExp(){\r\r |
| 192 | return replace;\r\r |
| 193 | }\r\r |
| 194 | \r\r |
| 195 | public int getTakeOut(){\r\r |
| 196 | return takeOut;\r\r |
| 197 | }\r\r |
| 198 | \r\r |
| 199 | public boolean startsWithExp(){\r\r |
| 200 | return start;\r\r |
| 201 | }\r\r |
| 202 | \r\r |
| 203 | public int lengthOfMatch(){\r\r |
| 204 | return matchLength;\r\r |
| 205 | }\r\r |
| 206 | \r\r |
| 207 | // Just for debugging purposes.\r\r |
| 208 | public String toString(){\r\r |
| 209 | return "Match:"+String.valueOf(match)\r\r |
| 210 | +" Replace:"+replace\r\r |
| 211 | +" TakeOut:"+takeOut\r\r |
| 212 | +" MatchLength:"+matchLength\r\r |
| 213 | +" Start:"+start\r\r |
| 214 | +" End:"+end;\r\r |
| 215 | }\r\r |
| 216 | \r\r |
| 217 | }\r\r |
| 218 | }\r\r |