--- /dev/null
+package com.swabunga.spell.event;\r\r
+\r\r
+import java.util.*;\r\r
+import java.text.*;\r\r
+import javax.swing.text.AttributeSet;\r\r
+import javax.swing.text.Document;\r\r
+import javax.swing.text.Element;\r\r
+import javax.swing.text.Segment;\r\r
+import javax.swing.text.BadLocationException;\r\r
+\r\r
+/** This class tokenizes a swing document model. It also allows for the\r\r
+ * document model to be changed when corrections occur.\r\r
+ *\r\r
+ * @author Jason Height (jheight@chariot.net.au)\r\r
+ */\r\r
+public class DocumentWordTokenizer implements WordTokenizer {\r\r
+ /** Holds the start character position of the current word*/\r\r
+ private int currentWordPos = 0;\r\r
+ /** Holds the end character position of the current word*/\r\r
+ private int currentWordEnd = 0;\r\r
+ /** Holds the start character position of the next word*/\r\r
+ private int nextWordPos = -1;\r\r
+ /** The actual text that is being tokenized*/\r\r
+ private Document document;\r\r
+ /** The character iterator over the document*/\r\r
+ private Segment text;\r\r
+ /** The cumulative word count that have been processed*/\r\r
+ private int wordCount = 0;\r\r
+ /** Flag indicating if there are any more tokens (words) left*/\r\r
+ private boolean moreTokens = true;\r\r
+ /** Is this a special case where the currentWordStart, currntWordEnd and\r\r
+ * nextWordPos have already been calculated. (see nextWord)\r\r
+ */\r\r
+ private boolean first = true;\r\r
+\r\r
+ private BreakIterator sentanceIterator;\r\r
+ private boolean startsSentance = true;\r\r
+\r\r
+\r\r
+ public DocumentWordTokenizer(Document document) {\r\r
+ this.document = document;\r\r
+ //Create a text segment over the etire document\r\r
+ text = new Segment();\r\r
+ sentanceIterator = BreakIterator.getSentenceInstance();\r\r
+ try {\r\r
+ document.getText(0, document.getLength(), text);\r\r
+ sentanceIterator.setText(text);\r\r
+ currentWordPos = getNextWordStart(text, 0);\r\r
+ //If the current word pos is -1 then the string was all white space\r\r
+ if (currentWordPos != -1) {\r\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
+ nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
+ } else {\r\r
+ moreTokens = false;\r\r
+ }\r\r
+ } catch (BadLocationException ex) {\r\r
+ moreTokens = false;\r\r
+ }\r\r
+ }\r\r
+\r\r
+ /** This helper method will return the start character of the next\r\r
+ * word in the buffer from the start position\r\r
+ */\r\r
+ private static int getNextWordStart(Segment text, int startPos) {\r\r
+ if (startPos <= text.getEndIndex())\r\r
+ for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {\r\r
+ if (Character.isLetterOrDigit(ch)) {\r\r
+ return text.getIndex();\r\r
+ }\r\r
+ }\r\r
+ return -1;\r\r
+ }\r\r
+\r\r
+ /** This helper method will return the end of the next word in the buffer.\r\r
+ *\r\r
+ */\r\r
+ private static int getNextWordEnd(Segment text, int startPos) {\r\r
+ for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {\r\r
+ if (!Character.isLetterOrDigit(ch)) {\r\r
+ return text.getIndex();\r\r
+ }\r\r
+ }\r\r
+ return text.getEndIndex();\r\r
+ }\r\r
+\r\r
+\r\r
+ /** Returns true if there are more words that can be processed in the string\r\r
+ *\r\r
+ */\r\r
+ public boolean hasMoreWords() {\r\r
+ return moreTokens;\r\r
+ }\r\r
+\r\r
+ /** Returns the current character position in the text\r\r
+ *\r\r
+ */\r\r
+ public int getCurrentWordPosition() {\r\r
+ return currentWordPos;\r\r
+ }\r\r
+\r\r
+ /** Returns the current end word position in the text\r\r
+ *\r\r
+ */\r\r
+ public int getCurrentWordEnd() {\r\r
+ return currentWordEnd;\r\r
+ }\r\r
+\r\r
+\r\r
+ /** Returns the next word in the text\r\r
+ *\r\r
+ */\r\r
+ public String nextWord() {\r\r
+ if (!first) {\r\r
+ currentWordPos = nextWordPos;\r\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
+ nextWordPos = getNextWordStart(text, currentWordEnd+1);\r\r
+ int current = sentanceIterator.current();\r\r
+ if (current == currentWordPos)\r\r
+ startsSentance = true;\r\r
+ else {\r\r
+ startsSentance = false;\r\r
+ if (currentWordEnd > current)\r\r
+ sentanceIterator.next();\r\r
+ }\r\r
+\r\r
+ }\r\r
+ //The nextWordPos has already been populated\r\r
+ String word = null;\r\r
+ try {\r\r
+ word = document.getText(currentWordPos, currentWordEnd-currentWordPos);\r\r
+ } catch (BadLocationException ex) {\r\r
+ moreTokens = false;\r\r
+ }\r\r
+ wordCount++;\r\r
+ first = false;\r\r
+ if (nextWordPos == -1)\r\r
+ moreTokens = false;\r\r
+ return word;\r\r
+ }\r\r
+\r\r
+ /** Returns the current number of words that have been processed\r\r
+ *\r\r
+ */\r\r
+ public int getCurrentWordCount() {\r\r
+ return wordCount;\r\r
+ }\r\r
+\r\r
+ /** Replaces the current word token*/\r\r
+ public void replaceWord(String newWord) {\r\r
+ if (currentWordPos != -1) {\r\r
+ try {\r\r
+ /* ORIGINAL\r\r
+ document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
+ document.insertString(currentWordPos, newWord, null);\r\r
+ */\r\r
+ // Howard's Version for Ekit\r\r
+ Element element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);\r\r
+ AttributeSet attribs = element.getAttributes();\r\r
+ document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
+ document.insertString(currentWordPos, newWord, attribs);\r\r
+ // End Howard's Version\r\r
+ //Need to reset the segment\r\r
+ document.getText(0, document.getLength(), text);\r\r
+ } catch (BadLocationException ex) {\r\r
+ throw new RuntimeException(ex.getMessage());\r\r
+ }\r\r
+ //Position after the newly replaced word(s)\r\r
+ //Position after the newly replaced word(s)\r\r
+ first = true;\r\r
+ currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());\r\r
+ if (currentWordPos != -1) {\r\r
+ currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
+ nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
+ sentanceIterator.setText(text);\r\r
+ sentanceIterator.following(currentWordPos);\r\r
+ } else moreTokens = false;\r\r
+ }\r\r
+ }\r\r
+\r\r
+ /** Returns the current text that is being tokenized (includes any changes\r\r
+ * that have been made)\r\r
+ */\r\r
+ public String getContext() {\r\r
+ return text.toString();\r\r
+ }\r\r
+\r\r
+ /** Returns true iif the current word is at the start of a sentance*/\r\r
+ public boolean isNewSentance() {\r\r
+ return startsSentance;\r\r
+ }\r\r
+\r\r
+}
\ No newline at end of file