Initial revision
[old-projects.git] / ekit / com / swabunga / spell / engine / GenericTransformator.java
1 package com.swabunga.spell.engine;
2
3 import java.io.*;
4 import java.util.*;
5
6 /** A Generic implementation of a transformator takes an aspell
7 * phonetics file and constructs some sort of transformationtable using
8 * the inner class Rule.
9 *
10 * @author Robert Gustavsson (robert@lindesign.se)
11 */
12 public class GenericTransformator implements Transformator{
13
14 public static final char STARTMULTI='(';
15 public static final char ENDMULTI=')';
16
17 Object[] ruleArray=null;
18
19 public GenericTransformator(File phonetic)throws IOException{
20 buildRules(new BufferedReader(new FileReader(phonetic)));
21 }
22
23 /**
24 * Returns the phonetic code of the word.
25 */
26 public String transform(String word) {
27 if(ruleArray==null)
28 return null;
29 TransformationRule rule;
30 StringBuffer str=new StringBuffer(word.toUpperCase());
31 int strLength=str.length();
32 int startPos=0, add=1;
33
34 while(startPos<strLength){
35 //System.out.println("StartPos:"+startPos);
36 add=1;
37 for(int i=0;i<ruleArray.length;i++){
38 //System.out.println("Testing rule#:"+i);
39 rule=(TransformationRule)ruleArray[i];
40 if(rule.startsWithExp() && startPos>0)
41 continue;
42 if(startPos+rule.lengthOfMatch()>=strLength)
43 continue;
44 if(rule.isMatching(str,startPos)){
45 str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());
46 add=rule.getReplaceExp().length();
47 strLength-=rule.getTakeOut();
48 strLength+=add;
49 //System.out.println("Replacing with rule#:"+i+" add="+add);
50 break;
51 }
52 }
53 startPos+=add;
54 }
55 return str.toString();
56 }
57
58 // Used to build up the transformastion table.
59 private void buildRules(BufferedReader in)throws IOException{
60 String read=null;
61 LinkedList ruleList=new LinkedList();
62 while((read=in.readLine())!=null){
63 buildRule(realTrimmer(read),ruleList);
64 }
65 ruleArray=ruleList.toArray();
66 }
67
68 // Here is where the real work of reading the phonetics file is done.
69 private void buildRule(String str, LinkedList ruleList){
70 if(str.length()<1)
71 return;
72 if(str.startsWith("version"))
73 return;
74
75 TransformationRule rule=null;
76 StringBuffer matchExp=new StringBuffer();
77 StringBuffer replaceExp=new StringBuffer();
78 boolean start=false, end=false;
79 int takeOutPart=0, matchLength=0;
80 boolean match=true, inMulti=false;
81 for(int i=0;i<str.length();i++){
82 if(Character.isWhitespace(str.charAt(i))){
83 match=false;
84 }else{
85 if(match){
86 if (!isReservedChar(str.charAt(i))){
87 matchExp.append(str.charAt(i));
88 if(!inMulti){
89 takeOutPart++;
90 matchLength++;
91 }
92 if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)
93 inMulti=!inMulti;
94 }
95 if (str.charAt(i)=='-')
96 takeOutPart--;
97 if (str.charAt(i)=='^')
98 start=true;
99 if (str.charAt(i)=='$')
100 end=true;
101 }else{
102 replaceExp.append(str.charAt(i));
103 }
104 }
105 }
106 rule=new TransformationRule(matchExp.toString(), replaceExp.toString()
107 , takeOutPart, matchLength, start, end);
108 ruleList.add(rule);
109 }
110
111 // Chars with special meaning to aspell. Not everyone is implemented here.
112 private boolean isReservedChar(char ch){
113 if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))
114 return true;
115 return false;
116 }
117
118 // Trims off everything we don't care about.
119 private String realTrimmer(String row){
120 int pos=row.indexOf('#');
121 if(pos!=-1){
122 row=row.substring(0,pos);
123 }
124 return row.trim();
125 }
126
127 // Inner Classes
128 /*
129 * Holds the match string and the replace string and all the rule attributes.
130 * Is responsible for indicating matches.
131 */
132 private class TransformationRule{
133
134 private String replace;
135 private char[] match;
136 // takeOut=number of chars to replace;
137 // matchLength=length of matching string counting multies as one.
138 private int takeOut, matchLength;
139 private boolean start, end;
140
141 // Construktor
142 public TransformationRule(String match, String replace, int takeout
143 , int matchLength, boolean start, boolean end){
144 this.match=match.toCharArray();
145 this.replace=replace;
146 this.takeOut=takeout;
147 this.matchLength=matchLength;
148 this.start=start;
149 this.end=end;
150 }
151
152 /*
153 * Returns true if word from pos and forward matches the match string.
154 * Precondition: wordPos+matchLength<word.length()
155 */
156 public boolean isMatching(StringBuffer word, int wordPos){
157 boolean matching=true, inMulti=false, multiMatch=false;
158 char matchCh;
159
160 for(int matchPos=0;matchPos<match.length;matchPos++){
161 matchCh=match[matchPos];
162 if(matchCh==STARTMULTI || matchCh==ENDMULTI){
163 inMulti=!inMulti;
164 if(!inMulti)
165 matching=matching & multiMatch;
166 else
167 multiMatch=false;
168 }else{
169 if(matchCh!=word.charAt(wordPos)){
170 if(inMulti)
171 multiMatch=multiMatch | false;
172 else
173 matching=false;
174 }else{
175 if(inMulti)
176 multiMatch=multiMatch | true;
177 else
178 matching=true;
179 }
180 if(!inMulti)
181 wordPos++;
182 if(!matching)
183 break;
184 }
185 }
186 if(end && wordPos!=word.length()-1)
187 matching=false;
188 return matching;
189 }
190
191 public String getReplaceExp(){
192 return replace;
193 }
194
195 public int getTakeOut(){
196 return takeOut;
197 }
198
199 public boolean startsWithExp(){
200 return start;
201 }
202
203 public int lengthOfMatch(){
204 return matchLength;
205 }
206
207 // Just for debugging purposes.
208 public String toString(){
209 return "Match:"+String.valueOf(match)
210 +" Replace:"+replace
211 +" TakeOut:"+takeOut
212 +" MatchLength:"+matchLength
213 +" Start:"+start
214 +" End:"+end;
215 }
216
217 }
218 }