X-Git-Url: http://git.polytechnique.org/?a=blobdiff_plain;f=ekit%2Fcom%2Fswabunga%2Fspell%2Fengine%2FGenericTransformator.java;fp=ekit%2Fcom%2Fswabunga%2Fspell%2Fengine%2FGenericTransformator.java;h=c0abb66edbab59d9bd485941fbf7c8ff166f73dd;hb=6dd702802493556cff5e55eb4982d23e79a30832;hp=0000000000000000000000000000000000000000;hpb=c47fa64d719ab7f5062a87fbf5cc55a7916c5c53;p=old-projects.git

diff --git a/ekit/com/swabunga/spell/engine/GenericTransformator.java b/ekit/com/swabunga/spell/engine/GenericTransformator.java
new file mode 100644
index 0000000..c0abb66
--- /dev/null
+++ b/ekit/com/swabunga/spell/engine/GenericTransformator.java
@@ -0,0 +1,218 @@
+package  com.swabunga.spell.engine;
+
+import java.io.*;
+import java.util.*;
+
+/** A Generic implementation of a transformator takes an aspell
+ *  phonetics file and constructs some sort of transformationtable using
+ *  the inner class Rule.
+ *
+ * @author Robert Gustavsson (robert@lindesign.se)
+ */
+public class GenericTransformator implements Transformator{
+    
+    public static final char STARTMULTI='(';
+    public static final char ENDMULTI=')';
+
+    Object[] ruleArray=null;
+
+    public GenericTransformator(File phonetic)throws IOException{
+        buildRules(new BufferedReader(new FileReader(phonetic)));
+    }
+
+    /**
+    * Returns the phonetic code of the word.
+    */
+    public String transform(String word) {       
+        if(ruleArray==null)
+            return null;
+        TransformationRule rule;
+        StringBuffer str=new StringBuffer(word.toUpperCase());
+        int strLength=str.length();
+        int startPos=0, add=1;
+
+        while(startPos<strLength){
+            //System.out.println("StartPos:"+startPos);
+            add=1;
+            for(int i=0;i<ruleArray.length;i++){
+                //System.out.println("Testing rule#:"+i);
+                rule=(TransformationRule)ruleArray[i];
+                if(rule.startsWithExp() && startPos>0)
+                    continue;
+                if(startPos+rule.lengthOfMatch()>=strLength)
+                    continue;
+                if(rule.isMatching(str,startPos)){
+                    str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());
+                    add=rule.getReplaceExp().length();
+                    strLength-=rule.getTakeOut();
+                    strLength+=add;
+                    //System.out.println("Replacing with rule#:"+i+" add="+add);
+                    break;
+                }
+            }
+            startPos+=add;
+        }
+        return str.toString();
+    }
+
+    // Used to build up the transformastion table.
+    private void buildRules(BufferedReader in)throws IOException{
+        String read=null;
+        LinkedList ruleList=new LinkedList();
+        while((read=in.readLine())!=null){
+            buildRule(realTrimmer(read),ruleList);
+        }
+        ruleArray=ruleList.toArray();
+    }
+    
+    // Here is where the real work of reading the phonetics file is done.
+    private void buildRule(String str, LinkedList ruleList){
+        if(str.length()<1)
+            return;
+        if(str.startsWith("version"))
+            return;
+        
+        TransformationRule rule=null;
+        StringBuffer matchExp=new StringBuffer();
+        StringBuffer replaceExp=new StringBuffer();
+        boolean start=false, end=false;
+        int takeOutPart=0, matchLength=0;
+        boolean match=true, inMulti=false;
+        for(int i=0;i<str.length();i++){
+            if(Character.isWhitespace(str.charAt(i))){
+                match=false;
+            }else{
+                if(match){
+                    if (!isReservedChar(str.charAt(i))){
+                        matchExp.append(str.charAt(i));
+                        if(!inMulti){
+                            takeOutPart++;
+                            matchLength++;
+                        }
+                        if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)
+                            inMulti=!inMulti;
+                    }
+                    if (str.charAt(i)=='-')
+                        takeOutPart--;
+                    if (str.charAt(i)=='^')
+                        start=true;
+                    if (str.charAt(i)=='$')
+                        end=true;
+                }else{
+                    replaceExp.append(str.charAt(i));
+                }
+            }
+        }
+        rule=new TransformationRule(matchExp.toString(), replaceExp.toString()
+                                        , takeOutPart, matchLength, start, end);
+        ruleList.add(rule);
+    }
+    
+    // Chars with special meaning to aspell. Not everyone is implemented here.
+    private boolean isReservedChar(char ch){
+        if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))
+            return true;
+        return false;
+    }
+
+    // Trims off everything we don't care about.
+    private String realTrimmer(String row){
+        int pos=row.indexOf('#');
+        if(pos!=-1){
+            row=row.substring(0,pos);
+        }
+        return row.trim();
+    }
+
+    // Inner Classes
+    /*
+    * Holds the match string and the replace string and all the rule attributes.
+    * Is responsible for indicating matches.
+    */
+    private class TransformationRule{
+
+        private String replace;
+        private char[] match;
+        // takeOut=number of chars to replace; 
+        // matchLength=length of matching string counting multies as one.
+        private int takeOut, matchLength;
+        private boolean start, end;
+
+        // Construktor
+        public TransformationRule(String match, String replace, int takeout
+                                  , int matchLength, boolean start, boolean end){
+            this.match=match.toCharArray();
+            this.replace=replace;
+            this.takeOut=takeout;
+            this.matchLength=matchLength;
+            this.start=start;
+            this.end=end;
+        }
+
+        /*
+        * Returns true if word from pos and forward matches the match string.
+        * Precondition: wordPos+matchLength<word.length()
+        */
+        public boolean isMatching(StringBuffer word, int wordPos){
+            boolean matching=true, inMulti=false, multiMatch=false;
+            char matchCh;
+            
+            for(int matchPos=0;matchPos<match.length;matchPos++){
+                matchCh=match[matchPos];
+                if(matchCh==STARTMULTI || matchCh==ENDMULTI){
+                    inMulti=!inMulti;
+                    if(!inMulti)
+                        matching=matching & multiMatch;
+                    else
+                        multiMatch=false;
+                }else{
+                    if(matchCh!=word.charAt(wordPos)){
+                        if(inMulti)
+                            multiMatch=multiMatch | false;
+                        else
+                            matching=false;
+                    }else{
+                        if(inMulti)
+                            multiMatch=multiMatch | true;
+                        else
+                            matching=true;
+                    }
+                    if(!inMulti)
+                        wordPos++;
+                    if(!matching)
+                        break;
+                }
+            }
+            if(end && wordPos!=word.length()-1)
+                matching=false;
+            return matching;
+        }
+
+        public String getReplaceExp(){
+            return  replace;
+        }
+
+        public int getTakeOut(){
+            return takeOut;
+        }
+
+        public boolean startsWithExp(){
+            return start;
+        }
+        
+        public int lengthOfMatch(){
+            return matchLength;
+        }
+        
+        // Just for debugging purposes.
+        public String toString(){
+            return "Match:"+String.valueOf(match)
+                   +" Replace:"+replace
+                   +" TakeOut:"+takeOut
+                   +" MatchLength:"+matchLength
+                   +" Start:"+start
+                   +" End:"+end;
+        }
+
+    }
+}