ekit/com/swabunga/spell/engine/GenericTransformator.java

   1 package  com.swabunga.spell.engine;
   2
   3 import java.io.*;
   4 import java.util.*;
   5
   6 /** A Generic implementation of a transformator takes an aspell
   7  *  phonetics file and constructs some sort of transformationtable using
   8  *  the inner class Rule.
   9  *
  10  * @author Robert Gustavsson (robert@lindesign.se)
  11  */
  12 public class GenericTransformator implements Transformator{
  13
  14     public static final char STARTMULTI='(';
  15     public static final char ENDMULTI=')';
  16
  17     Object[] ruleArray=null;
  18
  19     public GenericTransformator(File phonetic)throws IOException{
  20         buildRules(new BufferedReader(new FileReader(phonetic)));
  21     }
  22
  23     /**
  24     * Returns the phonetic code of the word.
  25     */
  26     public String transform(String word) {
  27         if(ruleArray==null)
  28             return null;
  29         TransformationRule rule;
  30         StringBuffer str=new StringBuffer(word.toUpperCase());
  31         int strLength=str.length();
  32         int startPos=0, add=1;
  33
  34         while(startPos<strLength){
  35             //System.out.println("StartPos:"+startPos);
  36             add=1;
  37             for(int i=0;i<ruleArray.length;i++){
  38                 //System.out.println("Testing rule#:"+i);
  39                 rule=(TransformationRule)ruleArray[i];
  40                 if(rule.startsWithExp() && startPos>0)
  41                     continue;
  42                 if(startPos+rule.lengthOfMatch()>=strLength)
  43                     continue;
  44                 if(rule.isMatching(str,startPos)){
  45                     str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());
  46                     add=rule.getReplaceExp().length();
  47                     strLength-=rule.getTakeOut();
  48                     strLength+=add;
  49                     //System.out.println("Replacing with rule#:"+i+" add="+add);
  50                     break;
  51                 }
  52             }
  53             startPos+=add;
  54         }
  55         return str.toString();
  56     }
  57
  58     // Used to build up the transformastion table.
  59     private void buildRules(BufferedReader in)throws IOException{
  60         String read=null;
  61         LinkedList ruleList=new LinkedList();
  62         while((read=in.readLine())!=null){
  63             buildRule(realTrimmer(read),ruleList);
  64         }
  65         ruleArray=ruleList.toArray();
  66     }
  67
  68     // Here is where the real work of reading the phonetics file is done.
  69     private void buildRule(String str, LinkedList ruleList){
  70         if(str.length()<1)
  71             return;
  72         if(str.startsWith("version"))
  73             return;
  74
  75         TransformationRule rule=null;
  76         StringBuffer matchExp=new StringBuffer();
  77         StringBuffer replaceExp=new StringBuffer();
  78         boolean start=false, end=false;
  79         int takeOutPart=0, matchLength=0;
  80         boolean match=true, inMulti=false;
  81         for(int i=0;i<str.length();i++){
  82             if(Character.isWhitespace(str.charAt(i))){
  83                 match=false;
  84             }else{
  85                 if(match){
  86                     if (!isReservedChar(str.charAt(i))){
  87                         matchExp.append(str.charAt(i));
  88                         if(!inMulti){
  89                             takeOutPart++;
  90                             matchLength++;
  91                         }
  92                         if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)
  93                             inMulti=!inMulti;
  94                     }
  95                     if (str.charAt(i)=='-')
  96                         takeOutPart--;
  97                     if (str.charAt(i)=='^')
  98                         start=true;
  99                     if (str.charAt(i)=='$')
 100                         end=true;
 101                 }else{
 102                     replaceExp.append(str.charAt(i));
 103                 }
 104             }
 105         }
 106         rule=new TransformationRule(matchExp.toString(), replaceExp.toString()
 107                                         , takeOutPart, matchLength, start, end);
 108         ruleList.add(rule);
 109     }
 110
 111     // Chars with special meaning to aspell. Not everyone is implemented here.
 112     private boolean isReservedChar(char ch){
 113         if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))
 114             return true;
 115         return false;
 116     }
 117
 118     // Trims off everything we don't care about.
 119     private String realTrimmer(String row){
 120         int pos=row.indexOf('#');
 121         if(pos!=-1){
 122             row=row.substring(0,pos);
 123         }
 124         return row.trim();
 125     }
 126
 127     // Inner Classes
 128     /*
 129     * Holds the match string and the replace string and all the rule attributes.
 130     * Is responsible for indicating matches.
 131     */
 132     private class TransformationRule{
 133
 134         private String replace;
 135         private char[] match;
 136         // takeOut=number of chars to replace;
 137         // matchLength=length of matching string counting multies as one.
 138         private int takeOut, matchLength;
 139         private boolean start, end;
 140
 141         // Construktor
 142         public TransformationRule(String match, String replace, int takeout
 143                                   , int matchLength, boolean start, boolean end){
 144             this.match=match.toCharArray();
 145             this.replace=replace;
 146             this.takeOut=takeout;
 147             this.matchLength=matchLength;
 148             this.start=start;
 149             this.end=end;
 150         }
 151
 152         /*
 153         * Returns true if word from pos and forward matches the match string.
 154         * Precondition: wordPos+matchLength<word.length()
 155         */
 156         public boolean isMatching(StringBuffer word, int wordPos){
 157             boolean matching=true, inMulti=false, multiMatch=false;
 158             char matchCh;
 159
 160             for(int matchPos=0;matchPos<match.length;matchPos++){
 161                 matchCh=match[matchPos];
 162                 if(matchCh==STARTMULTI || matchCh==ENDMULTI){
 163                     inMulti=!inMulti;
 164                     if(!inMulti)
 165                         matching=matching & multiMatch;
 166                     else
 167                         multiMatch=false;
 168                 }else{
 169                     if(matchCh!=word.charAt(wordPos)){
 170                         if(inMulti)
 171                             multiMatch=multiMatch | false;
 172                         else
 173                             matching=false;
 174                     }else{
 175                         if(inMulti)
 176                             multiMatch=multiMatch | true;
 177                         else
 178                             matching=true;
 179                     }
 180                     if(!inMulti)
 181                         wordPos++;
 182                     if(!matching)
 183                         break;
 184                 }
 185             }
 186             if(end && wordPos!=word.length()-1)
 187                 matching=false;
 188             return matching;
 189         }
 190
 191         public String getReplaceExp(){
 192             return  replace;
 193         }
 194
 195         public int getTakeOut(){
 196             return takeOut;
 197         }
 198
 199         public boolean startsWithExp(){
 200             return start;
 201         }
 202
 203         public int lengthOfMatch(){
 204             return matchLength;
 205         }
 206
 207         // Just for debugging purposes.
 208         public String toString(){
 209             return "Match:"+String.valueOf(match)
 210                    +" Replace:"+replace
 211                    +" TakeOut:"+takeOut
 212                    +" MatchLength:"+matchLength
 213                    +" Start:"+start
 214                    +" End:"+end;
 215         }
 216
 217     }
 218 }