Initial revision

[old-projects.git] / ekit / com / swabunga / spell / engine / GenericTransformator.java
diff --git a/ekit/com/swabunga/spell/engine/GenericTransformator.java b/ekit/com/swabunga/spell/engine/GenericTransformator.java

new file mode 100644 (file)

index 0000000..c0abb66
--- /dev/null
+++ b/ekit/com/swabunga/spell/engine/GenericTransformator.java
@@ -0,0 +1,218 @@
+package  com.swabunga.spell.engine;\r\r
+\r\r
+import java.io.*;\r\r
+import java.util.*;\r\r
+\r\r
+/** A Generic implementation of a transformator takes an aspell\r\r
+ *  phonetics file and constructs some sort of transformationtable using\r\r
+ *  the inner class Rule.\r\r
+ *\r\r
+ * @author Robert Gustavsson (robert@lindesign.se)\r\r
+ */\r\r
+public class GenericTransformator implements Transformator{\r\r
+    \r\r
+    public static final char STARTMULTI='(';\r\r
+    public static final char ENDMULTI=')';\r\r
+\r\r
+    Object[] ruleArray=null;\r\r
+\r\r
+    public GenericTransformator(File phonetic)throws IOException{\r\r
+        buildRules(new BufferedReader(new FileReader(phonetic)));\r\r
+    }\r\r
+\r\r
+    /**\r\r
+    * Returns the phonetic code of the word.\r\r
+    */\r\r
+    public String transform(String word) {       \r\r
+        if(ruleArray==null)\r\r
+            return null;\r\r
+        TransformationRule rule;\r\r
+        StringBuffer str=new StringBuffer(word.toUpperCase());\r\r
+        int strLength=str.length();\r\r
+        int startPos=0, add=1;\r\r
+\r\r
+        while(startPos<strLength){\r\r
+            //System.out.println("StartPos:"+startPos);\r\r
+            add=1;\r\r
+            for(int i=0;i<ruleArray.length;i++){\r\r
+                //System.out.println("Testing rule#:"+i);\r\r
+                rule=(TransformationRule)ruleArray[i];\r\r
+                if(rule.startsWithExp() && startPos>0)\r\r
+                    continue;\r\r
+                if(startPos+rule.lengthOfMatch()>=strLength)\r\r
+                    continue;\r\r
+                if(rule.isMatching(str,startPos)){\r\r
+                    str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());\r\r
+                    add=rule.getReplaceExp().length();\r\r
+                    strLength-=rule.getTakeOut();\r\r
+                    strLength+=add;\r\r
+                    //System.out.println("Replacing with rule#:"+i+" add="+add);\r\r
+                    break;\r\r
+                }\r\r
+            }\r\r
+            startPos+=add;\r\r
+        }\r\r
+        return str.toString();\r\r
+    }\r\r
+\r\r
+    // Used to build up the transformastion table.\r\r
+    private void buildRules(BufferedReader in)throws IOException{\r\r
+        String read=null;\r\r
+        LinkedList ruleList=new LinkedList();\r\r
+        while((read=in.readLine())!=null){\r\r
+            buildRule(realTrimmer(read),ruleList);\r\r
+        }\r\r
+        ruleArray=ruleList.toArray();\r\r
+    }\r\r
+    \r\r
+    // Here is where the real work of reading the phonetics file is done.\r\r
+    private void buildRule(String str, LinkedList ruleList){\r\r
+        if(str.length()<1)\r\r
+            return;\r\r
+        if(str.startsWith("version"))\r\r
+            return;\r\r
+        \r\r
+        TransformationRule rule=null;\r\r
+        StringBuffer matchExp=new StringBuffer();\r\r
+        StringBuffer replaceExp=new StringBuffer();\r\r
+        boolean start=false, end=false;\r\r
+        int takeOutPart=0, matchLength=0;\r\r
+        boolean match=true, inMulti=false;\r\r
+        for(int i=0;i<str.length();i++){\r\r
+            if(Character.isWhitespace(str.charAt(i))){\r\r
+                match=false;\r\r
+            }else{\r\r
+                if(match){\r\r
+                    if (!isReservedChar(str.charAt(i))){\r\r
+                        matchExp.append(str.charAt(i));\r\r
+                        if(!inMulti){\r\r
+                            takeOutPart++;\r\r
+                            matchLength++;\r\r
+                        }\r\r
+                        if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)\r\r
+                            inMulti=!inMulti;\r\r
+                    }\r\r
+                    if (str.charAt(i)=='-')\r\r
+                        takeOutPart--;\r\r
+                    if (str.charAt(i)=='^')\r\r
+                        start=true;\r\r
+                    if (str.charAt(i)=='$')\r\r
+                        end=true;\r\r
+                }else{\r\r
+                    replaceExp.append(str.charAt(i));\r\r
+                }\r\r
+            }\r\r
+        }\r\r
+        rule=new TransformationRule(matchExp.toString(), replaceExp.toString()\r\r
+                                        , takeOutPart, matchLength, start, end);\r\r
+        ruleList.add(rule);\r\r
+    }\r\r
+    \r\r
+    // Chars with special meaning to aspell. Not everyone is implemented here.\r\r
+    private boolean isReservedChar(char ch){\r\r
+        if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))\r\r
+            return true;\r\r
+        return false;\r\r
+    }\r\r
+\r\r
+    // Trims off everything we don't care about.\r\r
+    private String realTrimmer(String row){\r\r
+        int pos=row.indexOf('#');\r\r
+        if(pos!=-1){\r\r
+            row=row.substring(0,pos);\r\r
+        }\r\r
+        return row.trim();\r\r
+    }\r\r
+\r\r
+    // Inner Classes\r\r
+    /*\r\r
+    * Holds the match string and the replace string and all the rule attributes.\r\r
+    * Is responsible for indicating matches.\r\r
+    */\r\r
+    private class TransformationRule{\r\r
+\r\r
+        private String replace;\r\r
+        private char[] match;\r\r
+        // takeOut=number of chars to replace; \r\r
+        // matchLength=length of matching string counting multies as one.\r\r
+        private int takeOut, matchLength;\r\r
+        private boolean start, end;\r\r
+\r\r
+        // Construktor\r\r
+        public TransformationRule(String match, String replace, int takeout\r\r
+                                  , int matchLength, boolean start, boolean end){\r\r
+            this.match=match.toCharArray();\r\r
+            this.replace=replace;\r\r
+            this.takeOut=takeout;\r\r
+            this.matchLength=matchLength;\r\r
+            this.start=start;\r\r
+            this.end=end;\r\r
+        }\r\r
+\r\r
+        /*\r\r
+        * Returns true if word from pos and forward matches the match string.\r\r
+        * Precondition: wordPos+matchLength<word.length()\r\r
+        */\r\r
+        public boolean isMatching(StringBuffer word, int wordPos){\r\r
+            boolean matching=true, inMulti=false, multiMatch=false;\r\r
+            char matchCh;\r\r
+            \r\r
+            for(int matchPos=0;matchPos<match.length;matchPos++){\r\r
+                matchCh=match[matchPos];\r\r
+                if(matchCh==STARTMULTI || matchCh==ENDMULTI){\r\r
+                    inMulti=!inMulti;\r\r
+                    if(!inMulti)\r\r
+                        matching=matching & multiMatch;\r\r
+                    else\r\r
+                        multiMatch=false;\r\r
+                }else{\r\r
+                    if(matchCh!=word.charAt(wordPos)){\r\r
+                        if(inMulti)\r\r
+                            multiMatch=multiMatch | false;\r\r
+                        else\r\r
+                            matching=false;\r\r
+                    }else{\r\r
+                        if(inMulti)\r\r
+                            multiMatch=multiMatch | true;\r\r
+                        else\r\r
+                            matching=true;\r\r
+                    }\r\r
+                    if(!inMulti)\r\r
+                        wordPos++;\r\r
+                    if(!matching)\r\r
+                        break;\r\r
+                }\r\r
+            }\r\r
+            if(end && wordPos!=word.length()-1)\r\r
+                matching=false;\r\r
+            return matching;\r\r
+        }\r\r
+\r\r
+        public String getReplaceExp(){\r\r
+            return  replace;\r\r
+        }\r\r
+\r\r
+        public int getTakeOut(){\r\r
+            return takeOut;\r\r
+        }\r\r
+\r\r
+        public boolean startsWithExp(){\r\r
+            return start;\r\r
+        }\r\r
+        \r\r
+        public int lengthOfMatch(){\r\r
+            return matchLength;\r\r
+        }\r\r
+        \r\r
+        // Just for debugging purposes.\r\r
+        public String toString(){\r\r
+            return "Match:"+String.valueOf(match)\r\r
+                   +" Replace:"+replace\r\r
+                   +" TakeOut:"+takeOut\r\r
+                   +" MatchLength:"+matchLength\r\r
+                   +" Start:"+start\r\r
+                   +" End:"+end;\r\r
+        }\r\r
+\r\r
+    }\r\r
+}\r\r