X-Git-Url: http://git.polytechnique.org/?a=blobdiff_plain;f=ekit%2Fcom%2Fswabunga%2Fspell%2Fengine%2FGenericTransformator.java;fp=ekit%2Fcom%2Fswabunga%2Fspell%2Fengine%2FGenericTransformator.java;h=c0abb66edbab59d9bd485941fbf7c8ff166f73dd;hb=6dd702802493556cff5e55eb4982d23e79a30832;hp=0000000000000000000000000000000000000000;hpb=c47fa64d719ab7f5062a87fbf5cc55a7916c5c53;p=old-projects.git diff --git a/ekit/com/swabunga/spell/engine/GenericTransformator.java b/ekit/com/swabunga/spell/engine/GenericTransformator.java new file mode 100644 index 0000000..c0abb66 --- /dev/null +++ b/ekit/com/swabunga/spell/engine/GenericTransformator.java @@ -0,0 +1,218 @@ +package com.swabunga.spell.engine; + +import java.io.*; +import java.util.*; + +/** A Generic implementation of a transformator takes an aspell + * phonetics file and constructs some sort of transformationtable using + * the inner class Rule. + * + * @author Robert Gustavsson (robert@lindesign.se) + */ +public class GenericTransformator implements Transformator{ + + public static final char STARTMULTI='('; + public static final char ENDMULTI=')'; + + Object[] ruleArray=null; + + public GenericTransformator(File phonetic)throws IOException{ + buildRules(new BufferedReader(new FileReader(phonetic))); + } + + /** + * Returns the phonetic code of the word. + */ + public String transform(String word) { + if(ruleArray==null) + return null; + TransformationRule rule; + StringBuffer str=new StringBuffer(word.toUpperCase()); + int strLength=str.length(); + int startPos=0, add=1; + + while(startPos0) + continue; + if(startPos+rule.lengthOfMatch()>=strLength) + continue; + if(rule.isMatching(str,startPos)){ + str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp()); + add=rule.getReplaceExp().length(); + strLength-=rule.getTakeOut(); + strLength+=add; + //System.out.println("Replacing with rule#:"+i+" add="+add); + break; + } + } + startPos+=add; + } + return str.toString(); + } + + // Used to build up the transformastion table. + private void buildRules(BufferedReader in)throws IOException{ + String read=null; + LinkedList ruleList=new LinkedList(); + while((read=in.readLine())!=null){ + buildRule(realTrimmer(read),ruleList); + } + ruleArray=ruleList.toArray(); + } + + // Here is where the real work of reading the phonetics file is done. + private void buildRule(String str, LinkedList ruleList){ + if(str.length()<1) + return; + if(str.startsWith("version")) + return; + + TransformationRule rule=null; + StringBuffer matchExp=new StringBuffer(); + StringBuffer replaceExp=new StringBuffer(); + boolean start=false, end=false; + int takeOutPart=0, matchLength=0; + boolean match=true, inMulti=false; + for(int i=0;i' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch)) + return true; + return false; + } + + // Trims off everything we don't care about. + private String realTrimmer(String row){ + int pos=row.indexOf('#'); + if(pos!=-1){ + row=row.substring(0,pos); + } + return row.trim(); + } + + // Inner Classes + /* + * Holds the match string and the replace string and all the rule attributes. + * Is responsible for indicating matches. + */ + private class TransformationRule{ + + private String replace; + private char[] match; + // takeOut=number of chars to replace; + // matchLength=length of matching string counting multies as one. + private int takeOut, matchLength; + private boolean start, end; + + // Construktor + public TransformationRule(String match, String replace, int takeout + , int matchLength, boolean start, boolean end){ + this.match=match.toCharArray(); + this.replace=replace; + this.takeOut=takeout; + this.matchLength=matchLength; + this.start=start; + this.end=end; + } + + /* + * Returns true if word from pos and forward matches the match string. + * Precondition: wordPos+matchLength