--- /dev/null
+package com.swabunga.spell.event;\r\r
+\r\r
+import java.util.*;\r\r
+import java.text.*;\r\r
+\r\r
+/** This class tokenizes a input string.\r\r
+ * <p>\r\r
+ * It also allows for the string to be mutated. The result after the spell\r\r
+ * checking is completed is available to the call to getFinalText</p>\r\r
+ *\r\r
+ * @author Jason Height (jheight@chariot.net.au)\r\r
+ */\r\r
+public class StringWordTokenizer implements WordTokenizer {\r\r
+ /** Holds the start character position of the current word*/\r\r
+ private int currentWordPos = 0;\r\r
+ /** Holds the end character position of the current word*/\r\r
+ private int currentWordEnd = 0;\r\r
+ /** Holds the start character position of the next word*/\r\r
+ private int nextWordPos = -1;\r\r
+ /** The actual text that is being tokenized*/\r\r
+ private StringBuffer text;\r\r
+ /** The cumulative word count that have been processed*/\r\r
+ private int wordCount = 0;\r\r
+ /** Flag indicating if there are any more tokens (words) left*/\r\r
+ private boolean moreTokens = true;\r\r
+ /** Is this a special case where the currentWordStart, currntWordEnd and\r\r
+ * nextWordPos have already been calculated. (see nextWord)\r\r
+ */\r\r
+ private boolean first = true;\r\r
+\r\r
+ private BreakIterator sentanceIterator;\r\r
+ private boolean startsSentance = true;\r\r
+\r\r
+\r\r
+ public StringWordTokenizer(String text) {\r\r
+ sentanceIterator = BreakIterator.getSentenceInstance();\r\r
+ sentanceIterator.setText(text);\r\r
+ sentanceIterator.first();\r\r
+ //Wrap a string buffer to hopefully make things a bit easier and efficient to\r\r
+ //replace words etc.\r\r
+ this.text = new StringBuffer(text);\r\r
+ currentWordPos = getNextWordStart(this.text, 0);\r\r
+ //If the current word pos is -1 then the string was all white space\r\r
+ if (currentWordPos != -1) {\r\r
+ currentWordEnd = getNextWordEnd(this.text, currentWordPos);\r\r
+ nextWordPos = getNextWordStart(this.text, currentWordEnd);\r\r
+ } else {\r\r
+ moreTokens = false;\r\r
+ }\r\r
+ }\r\r
+\r\r
+ /** This helper method will return the start character of the next\r\r
+ * word in the buffer from the start position\r\r
+ */\r\r
+ private static int getNextWordStart(StringBuffer text, int startPos) {\r\r
+ int size = text.length();\r\r
+ for (int i=startPos;i<size;i++) {\r\r
+ if (Character.isLetterOrDigit(text.charAt(i))) {\r\r
+ return i;\r\r
+ }\r\r
+ }\r\r
+ return -1;\r\r
+ }\r\r
+\r\r
+ /** This helper method will return the end of the next word in the buffer.\r\r
+ *\r\r
+ */\r\r
+ private static int getNextWordEnd(StringBuffer text, int startPos) {\r\r
+ int size = text.length();\r\r
+ for (int i=startPos;i<size;i++) {\r\r
+ if (!Character.isLetterOrDigit(text.charAt(i))) {\r\r
+ return i;\r\r
+ }\r\r
+ }\r\r
+ return size;\r\r
+ }\r\r
+\r\r
+\r\r
+ /** Returns true if there are more words that can be processed in the string\r\r
+ *\r\r
+ */\r\r
+ public boolean hasMoreWords() {\r\r
+ return moreTokens;\r\r
+ }\r\r
+\r\r
+ /** Returns the current character position in the text\r\r
+ *\r\r
+ */\r\r
+ public int getCurrentWordPosition() {\r\r
+ return currentWordPos;\r\r
+ }\r\r
+\r\r
+ /** Returns the current end word position in the text\r\r
+ *\r\r
+ */\r\r
+ public int getCurrentWordEnd() {\r\r
+ return currentWordEnd;\r\r
+ }\r\r
+\r\r
+ /** Returns the next word in the text\r\r
+ *\r\r
+ */\r\r
+ public String nextWord() {\r\r
+ if (!first) {\r\r
+ currentWordPos = nextWordPos;\r\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
+ nextWordPos = getNextWordStart(text, currentWordEnd+1);\r\r
+ int current = sentanceIterator.current();\r\r
+ if (current == currentWordPos)\r\r
+ startsSentance = true;\r\r
+ else {\r\r
+ startsSentance = false;\r\r
+ if (currentWordEnd > current)\r\r
+ sentanceIterator.next();\r\r
+ }\r\r
+ }\r\r
+ //The nextWordPos has already been populated\r\r
+ String word = text.substring(currentWordPos, currentWordEnd);\r\r
+ wordCount++;\r\r
+ first = false;\r\r
+ if (nextWordPos == -1)\r\r
+ moreTokens = false;\r\r
+ return word;\r\r
+ }\r\r
+\r\r
+ /** Returns the current number of words that have been processed\r\r
+ *\r\r
+ */\r\r
+ public int getCurrentWordCount() {\r\r
+ return wordCount;\r\r
+ }\r\r
+\r\r
+ /** Replaces the current word token*/\r\r
+ public void replaceWord(String newWord) {\r\r
+ if (currentWordPos != -1) {\r\r
+ text.replace(currentWordPos, currentWordEnd, newWord);\r\r
+ //Position after the newly replaced word(s)\r\r
+ first = true;\r\r
+ currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());\r\r
+ if (currentWordPos != -1) {\r\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
+ nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
+ sentanceIterator.setText(text.toString());\r\r
+ sentanceIterator.following(currentWordPos);\r\r
+ } else moreTokens = false;\r\r
+ }\r\r
+ }\r\r
+\r\r
+ /** returns true iif the current word is at the start of a sentance*/\r\r
+ public boolean isNewSentance() {\r\r
+ return startsSentance;\r\r
+ }\r\r
+\r\r
+ /** Returns the current text that is being tokenized (includes any changes\r\r
+ * that have been made)\r\r
+ */\r\r
+ public String getContext() {\r\r
+ return text.toString();\r\r
+ }\r\r
+\r\r
+ /** This method can be used to return the final text after the schecking is complete.*/\r\r
+ public String getFinalText() {\r\r
+ return getContext();\r\r
+ }\r\r
+\r\r
+\r\r
+ public static void main(String args[]) {\r\r
+ StringWordTokenizer t = new StringWordTokenizer(" This is a test problem");\r\r
+ while(t.hasMoreWords()) {\r\r
+ String word = t.nextWord();\r\r
+ System.out.println("Word is '"+word+"'");\r\r
+ if ("test".equals(word)) t.replaceWord("mightly big");\r\r
+ }\r\r
+ System.out.println("End text is: '"+t.getFinalText()+"'");\r\r
+\r\r
+ t = new StringWordTokenizer(" README ");\r\r
+ while(t.hasMoreWords()) {\r\r
+ String word = t.nextWord();\r\r
+ System.out.println("Word is '"+word+"'");\r\r
+ }\r\r
+ System.out.println("End text is: '"+t.getFinalText()+"'");\r\r
+\r\r
+ t = new StringWordTokenizer("This is a acronym (A.C.M.E). This is the second sentance.");\r\r
+ while(t.hasMoreWords()) {\r\r
+ String word = t.nextWord();\r\r
+ System.out.println("Word is '"+word+"'. Starts Sentance?="+t.isNewSentance());\r\r
+ if (word.equals("acronym"))\r\r
+ t.replaceWord("test");\r\r
+ }\r\r
+ }\r\r
+}
\ No newline at end of file