Commit | Line | Data |
---|---|---|
6dd70280 JL |
1 | package com.swabunga.spell.event;\r\r |
2 | \r\r | |
3 | import java.util.*;\r\r | |
4 | import java.text.*;\r\r | |
5 | import javax.swing.text.AttributeSet;\r\r | |
6 | import javax.swing.text.Document;\r\r | |
7 | import javax.swing.text.Element;\r\r | |
8 | import javax.swing.text.Segment;\r\r | |
9 | import javax.swing.text.BadLocationException;\r\r | |
10 | \r\r | |
11 | /** This class tokenizes a swing document model. It also allows for the\r\r | |
12 | * document model to be changed when corrections occur.\r\r | |
13 | *\r\r | |
14 | * @author Jason Height (jheight@chariot.net.au)\r\r | |
15 | */\r\r | |
16 | public class DocumentWordTokenizer implements WordTokenizer {\r\r | |
17 | /** Holds the start character position of the current word*/\r\r | |
18 | private int currentWordPos = 0;\r\r | |
19 | /** Holds the end character position of the current word*/\r\r | |
20 | private int currentWordEnd = 0;\r\r | |
21 | /** Holds the start character position of the next word*/\r\r | |
22 | private int nextWordPos = -1;\r\r | |
23 | /** The actual text that is being tokenized*/\r\r | |
24 | private Document document;\r\r | |
25 | /** The character iterator over the document*/\r\r | |
26 | private Segment text;\r\r | |
27 | /** The cumulative word count that have been processed*/\r\r | |
28 | private int wordCount = 0;\r\r | |
29 | /** Flag indicating if there are any more tokens (words) left*/\r\r | |
30 | private boolean moreTokens = true;\r\r | |
31 | /** Is this a special case where the currentWordStart, currntWordEnd and\r\r | |
32 | * nextWordPos have already been calculated. (see nextWord)\r\r | |
33 | */\r\r | |
34 | private boolean first = true;\r\r | |
35 | \r\r | |
36 | private BreakIterator sentanceIterator;\r\r | |
37 | private boolean startsSentance = true;\r\r | |
38 | \r\r | |
39 | \r\r | |
40 | public DocumentWordTokenizer(Document document) {\r\r | |
41 | this.document = document;\r\r | |
42 | //Create a text segment over the etire document\r\r | |
43 | text = new Segment();\r\r | |
44 | sentanceIterator = BreakIterator.getSentenceInstance();\r\r | |
45 | try {\r\r | |
46 | document.getText(0, document.getLength(), text);\r\r | |
47 | sentanceIterator.setText(text);\r\r | |
48 | currentWordPos = getNextWordStart(text, 0);\r\r | |
49 | //If the current word pos is -1 then the string was all white space\r\r | |
50 | if (currentWordPos != -1) {\r\r | |
51 | currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r | |
52 | nextWordPos = getNextWordStart(text, currentWordEnd);\r\r | |
53 | } else {\r\r | |
54 | moreTokens = false;\r\r | |
55 | }\r\r | |
56 | } catch (BadLocationException ex) {\r\r | |
57 | moreTokens = false;\r\r | |
58 | }\r\r | |
59 | }\r\r | |
60 | \r\r | |
61 | /** This helper method will return the start character of the next\r\r | |
62 | * word in the buffer from the start position\r\r | |
63 | */\r\r | |
64 | private static int getNextWordStart(Segment text, int startPos) {\r\r | |
65 | if (startPos <= text.getEndIndex())\r\r | |
66 | for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {\r\r | |
67 | if (Character.isLetterOrDigit(ch)) {\r\r | |
68 | return text.getIndex();\r\r | |
69 | }\r\r | |
70 | }\r\r | |
71 | return -1;\r\r | |
72 | }\r\r | |
73 | \r\r | |
74 | /** This helper method will return the end of the next word in the buffer.\r\r | |
75 | *\r\r | |
76 | */\r\r | |
77 | private static int getNextWordEnd(Segment text, int startPos) {\r\r | |
78 | for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {\r\r | |
79 | if (!Character.isLetterOrDigit(ch)) {\r\r | |
80 | return text.getIndex();\r\r | |
81 | }\r\r | |
82 | }\r\r | |
83 | return text.getEndIndex();\r\r | |
84 | }\r\r | |
85 | \r\r | |
86 | \r\r | |
87 | /** Returns true if there are more words that can be processed in the string\r\r | |
88 | *\r\r | |
89 | */\r\r | |
90 | public boolean hasMoreWords() {\r\r | |
91 | return moreTokens;\r\r | |
92 | }\r\r | |
93 | \r\r | |
94 | /** Returns the current character position in the text\r\r | |
95 | *\r\r | |
96 | */\r\r | |
97 | public int getCurrentWordPosition() {\r\r | |
98 | return currentWordPos;\r\r | |
99 | }\r\r | |
100 | \r\r | |
101 | /** Returns the current end word position in the text\r\r | |
102 | *\r\r | |
103 | */\r\r | |
104 | public int getCurrentWordEnd() {\r\r | |
105 | return currentWordEnd;\r\r | |
106 | }\r\r | |
107 | \r\r | |
108 | \r\r | |
109 | /** Returns the next word in the text\r\r | |
110 | *\r\r | |
111 | */\r\r | |
112 | public String nextWord() {\r\r | |
113 | if (!first) {\r\r | |
114 | currentWordPos = nextWordPos;\r\r | |
115 | currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r | |
116 | nextWordPos = getNextWordStart(text, currentWordEnd+1);\r\r | |
117 | int current = sentanceIterator.current();\r\r | |
118 | if (current == currentWordPos)\r\r | |
119 | startsSentance = true;\r\r | |
120 | else {\r\r | |
121 | startsSentance = false;\r\r | |
122 | if (currentWordEnd > current)\r\r | |
123 | sentanceIterator.next();\r\r | |
124 | }\r\r | |
125 | \r\r | |
126 | }\r\r | |
127 | //The nextWordPos has already been populated\r\r | |
128 | String word = null;\r\r | |
129 | try {\r\r | |
130 | word = document.getText(currentWordPos, currentWordEnd-currentWordPos);\r\r | |
131 | } catch (BadLocationException ex) {\r\r | |
132 | moreTokens = false;\r\r | |
133 | }\r\r | |
134 | wordCount++;\r\r | |
135 | first = false;\r\r | |
136 | if (nextWordPos == -1)\r\r | |
137 | moreTokens = false;\r\r | |
138 | return word;\r\r | |
139 | }\r\r | |
140 | \r\r | |
141 | /** Returns the current number of words that have been processed\r\r | |
142 | *\r\r | |
143 | */\r\r | |
144 | public int getCurrentWordCount() {\r\r | |
145 | return wordCount;\r\r | |
146 | }\r\r | |
147 | \r\r | |
148 | /** Replaces the current word token*/\r\r | |
149 | public void replaceWord(String newWord) {\r\r | |
150 | if (currentWordPos != -1) {\r\r | |
151 | try {\r\r | |
152 | /* ORIGINAL\r\r | |
153 | document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r | |
154 | document.insertString(currentWordPos, newWord, null);\r\r | |
155 | */\r\r | |
156 | // Howard's Version for Ekit\r\r | |
157 | Element element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);\r\r | |
158 | AttributeSet attribs = element.getAttributes();\r\r | |
159 | document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r | |
160 | document.insertString(currentWordPos, newWord, attribs);\r\r | |
161 | // End Howard's Version\r\r | |
162 | //Need to reset the segment\r\r | |
163 | document.getText(0, document.getLength(), text);\r\r | |
164 | } catch (BadLocationException ex) {\r\r | |
165 | throw new RuntimeException(ex.getMessage());\r\r | |
166 | }\r\r | |
167 | //Position after the newly replaced word(s)\r\r | |
168 | //Position after the newly replaced word(s)\r\r | |
169 | first = true;\r\r | |
170 | currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());\r\r | |
171 | if (currentWordPos != -1) {\r\r | |
172 | currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r | |
173 | nextWordPos = getNextWordStart(text, currentWordEnd);\r\r | |
174 | sentanceIterator.setText(text);\r\r | |
175 | sentanceIterator.following(currentWordPos);\r\r | |
176 | } else moreTokens = false;\r\r | |
177 | }\r\r | |
178 | }\r\r | |
179 | \r\r | |
180 | /** Returns the current text that is being tokenized (includes any changes\r\r | |
181 | * that have been made)\r\r | |
182 | */\r\r | |
183 | public String getContext() {\r\r | |
184 | return text.toString();\r\r | |
185 | }\r\r | |
186 | \r\r | |
187 | /** Returns true iif the current word is at the start of a sentance*/\r\r | |
188 | public boolean isNewSentance() {\r\r | |
189 | return startsSentance;\r\r | |
190 | }\r\r | |
191 | \r\r | |
192 | } |