| 1 | package com.swabunga.spell.event;\r\r |
| 2 | \r\r |
| 3 | import java.util.*;\r\r |
| 4 | import java.text.*;\r\r |
| 5 | import javax.swing.text.AttributeSet;\r\r |
| 6 | import javax.swing.text.Document;\r\r |
| 7 | import javax.swing.text.Element;\r\r |
| 8 | import javax.swing.text.Segment;\r\r |
| 9 | import javax.swing.text.BadLocationException;\r\r |
| 10 | \r\r |
| 11 | /** This class tokenizes a swing document model. It also allows for the\r\r |
| 12 | * document model to be changed when corrections occur.\r\r |
| 13 | *\r\r |
| 14 | * @author Jason Height (jheight@chariot.net.au)\r\r |
| 15 | */\r\r |
| 16 | public class DocumentWordTokenizer implements WordTokenizer {\r\r |
| 17 | /** Holds the start character position of the current word*/\r\r |
| 18 | private int currentWordPos = 0;\r\r |
| 19 | /** Holds the end character position of the current word*/\r\r |
| 20 | private int currentWordEnd = 0;\r\r |
| 21 | /** Holds the start character position of the next word*/\r\r |
| 22 | private int nextWordPos = -1;\r\r |
| 23 | /** The actual text that is being tokenized*/\r\r |
| 24 | private Document document;\r\r |
| 25 | /** The character iterator over the document*/\r\r |
| 26 | private Segment text;\r\r |
| 27 | /** The cumulative word count that have been processed*/\r\r |
| 28 | private int wordCount = 0;\r\r |
| 29 | /** Flag indicating if there are any more tokens (words) left*/\r\r |
| 30 | private boolean moreTokens = true;\r\r |
| 31 | /** Is this a special case where the currentWordStart, currntWordEnd and\r\r |
| 32 | * nextWordPos have already been calculated. (see nextWord)\r\r |
| 33 | */\r\r |
| 34 | private boolean first = true;\r\r |
| 35 | \r\r |
| 36 | private BreakIterator sentanceIterator;\r\r |
| 37 | private boolean startsSentance = true;\r\r |
| 38 | \r\r |
| 39 | \r\r |
| 40 | public DocumentWordTokenizer(Document document) {\r\r |
| 41 | this.document = document;\r\r |
| 42 | //Create a text segment over the etire document\r\r |
| 43 | text = new Segment();\r\r |
| 44 | sentanceIterator = BreakIterator.getSentenceInstance();\r\r |
| 45 | try {\r\r |
| 46 | document.getText(0, document.getLength(), text);\r\r |
| 47 | sentanceIterator.setText(text);\r\r |
| 48 | currentWordPos = getNextWordStart(text, 0);\r\r |
| 49 | //If the current word pos is -1 then the string was all white space\r\r |
| 50 | if (currentWordPos != -1) {\r\r |
| 51 | currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r |
| 52 | nextWordPos = getNextWordStart(text, currentWordEnd);\r\r |
| 53 | } else {\r\r |
| 54 | moreTokens = false;\r\r |
| 55 | }\r\r |
| 56 | } catch (BadLocationException ex) {\r\r |
| 57 | moreTokens = false;\r\r |
| 58 | }\r\r |
| 59 | }\r\r |
| 60 | \r\r |
| 61 | /** This helper method will return the start character of the next\r\r |
| 62 | * word in the buffer from the start position\r\r |
| 63 | */\r\r |
| 64 | private static int getNextWordStart(Segment text, int startPos) {\r\r |
| 65 | if (startPos <= text.getEndIndex())\r\r |
| 66 | for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {\r\r |
| 67 | if (Character.isLetterOrDigit(ch)) {\r\r |
| 68 | return text.getIndex();\r\r |
| 69 | }\r\r |
| 70 | }\r\r |
| 71 | return -1;\r\r |
| 72 | }\r\r |
| 73 | \r\r |
| 74 | /** This helper method will return the end of the next word in the buffer.\r\r |
| 75 | *\r\r |
| 76 | */\r\r |
| 77 | private static int getNextWordEnd(Segment text, int startPos) {\r\r |
| 78 | for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {\r\r |
| 79 | if (!Character.isLetterOrDigit(ch)) {\r\r |
| 80 | return text.getIndex();\r\r |
| 81 | }\r\r |
| 82 | }\r\r |
| 83 | return text.getEndIndex();\r\r |
| 84 | }\r\r |
| 85 | \r\r |
| 86 | \r\r |
| 87 | /** Returns true if there are more words that can be processed in the string\r\r |
| 88 | *\r\r |
| 89 | */\r\r |
| 90 | public boolean hasMoreWords() {\r\r |
| 91 | return moreTokens;\r\r |
| 92 | }\r\r |
| 93 | \r\r |
| 94 | /** Returns the current character position in the text\r\r |
| 95 | *\r\r |
| 96 | */\r\r |
| 97 | public int getCurrentWordPosition() {\r\r |
| 98 | return currentWordPos;\r\r |
| 99 | }\r\r |
| 100 | \r\r |
| 101 | /** Returns the current end word position in the text\r\r |
| 102 | *\r\r |
| 103 | */\r\r |
| 104 | public int getCurrentWordEnd() {\r\r |
| 105 | return currentWordEnd;\r\r |
| 106 | }\r\r |
| 107 | \r\r |
| 108 | \r\r |
| 109 | /** Returns the next word in the text\r\r |
| 110 | *\r\r |
| 111 | */\r\r |
| 112 | public String nextWord() {\r\r |
| 113 | if (!first) {\r\r |
| 114 | currentWordPos = nextWordPos;\r\r |
| 115 | currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r |
| 116 | nextWordPos = getNextWordStart(text, currentWordEnd+1);\r\r |
| 117 | int current = sentanceIterator.current();\r\r |
| 118 | if (current == currentWordPos)\r\r |
| 119 | startsSentance = true;\r\r |
| 120 | else {\r\r |
| 121 | startsSentance = false;\r\r |
| 122 | if (currentWordEnd > current)\r\r |
| 123 | sentanceIterator.next();\r\r |
| 124 | }\r\r |
| 125 | \r\r |
| 126 | }\r\r |
| 127 | //The nextWordPos has already been populated\r\r |
| 128 | String word = null;\r\r |
| 129 | try {\r\r |
| 130 | word = document.getText(currentWordPos, currentWordEnd-currentWordPos);\r\r |
| 131 | } catch (BadLocationException ex) {\r\r |
| 132 | moreTokens = false;\r\r |
| 133 | }\r\r |
| 134 | wordCount++;\r\r |
| 135 | first = false;\r\r |
| 136 | if (nextWordPos == -1)\r\r |
| 137 | moreTokens = false;\r\r |
| 138 | return word;\r\r |
| 139 | }\r\r |
| 140 | \r\r |
| 141 | /** Returns the current number of words that have been processed\r\r |
| 142 | *\r\r |
| 143 | */\r\r |
| 144 | public int getCurrentWordCount() {\r\r |
| 145 | return wordCount;\r\r |
| 146 | }\r\r |
| 147 | \r\r |
| 148 | /** Replaces the current word token*/\r\r |
| 149 | public void replaceWord(String newWord) {\r\r |
| 150 | if (currentWordPos != -1) {\r\r |
| 151 | try {\r\r |
| 152 | /* ORIGINAL\r\r |
| 153 | document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r |
| 154 | document.insertString(currentWordPos, newWord, null);\r\r |
| 155 | */\r\r |
| 156 | // Howard's Version for Ekit\r\r |
| 157 | Element element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);\r\r |
| 158 | AttributeSet attribs = element.getAttributes();\r\r |
| 159 | document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r |
| 160 | document.insertString(currentWordPos, newWord, attribs);\r\r |
| 161 | // End Howard's Version\r\r |
| 162 | //Need to reset the segment\r\r |
| 163 | document.getText(0, document.getLength(), text);\r\r |
| 164 | } catch (BadLocationException ex) {\r\r |
| 165 | throw new RuntimeException(ex.getMessage());\r\r |
| 166 | }\r\r |
| 167 | //Position after the newly replaced word(s)\r\r |
| 168 | //Position after the newly replaced word(s)\r\r |
| 169 | first = true;\r\r |
| 170 | currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());\r\r |
| 171 | if (currentWordPos != -1) {\r\r |
| 172 | currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r |
| 173 | nextWordPos = getNextWordStart(text, currentWordEnd);\r\r |
| 174 | sentanceIterator.setText(text);\r\r |
| 175 | sentanceIterator.following(currentWordPos);\r\r |
| 176 | } else moreTokens = false;\r\r |
| 177 | }\r\r |
| 178 | }\r\r |
| 179 | \r\r |
| 180 | /** Returns the current text that is being tokenized (includes any changes\r\r |
| 181 | * that have been made)\r\r |
| 182 | */\r\r |
| 183 | public String getContext() {\r\r |
| 184 | return text.toString();\r\r |
| 185 | }\r\r |
| 186 | \r\r |
| 187 | /** Returns true iif the current word is at the start of a sentance*/\r\r |
| 188 | public boolean isNewSentance() {\r\r |
| 189 | return startsSentance;\r\r |
| 190 | }\r\r |
| 191 | \r\r |
| 192 | } |