Initial revision
[old-projects.git] / ekit / com / swabunga / spell / event / DocumentWordTokenizer.java
CommitLineData
6dd70280
JL
1package com.swabunga.spell.event;\r\r
2\r\r
3import java.util.*;\r\r
4import java.text.*;\r\r
5import javax.swing.text.AttributeSet;\r\r
6import javax.swing.text.Document;\r\r
7import javax.swing.text.Element;\r\r
8import javax.swing.text.Segment;\r\r
9import javax.swing.text.BadLocationException;\r\r
10\r\r
11/** This class tokenizes a swing document model. It also allows for the\r\r
12 * document model to be changed when corrections occur.\r\r
13 *\r\r
14 * @author Jason Height (jheight@chariot.net.au)\r\r
15 */\r\r
16public class DocumentWordTokenizer implements WordTokenizer {\r\r
17 /** Holds the start character position of the current word*/\r\r
18 private int currentWordPos = 0;\r\r
19 /** Holds the end character position of the current word*/\r\r
20 private int currentWordEnd = 0;\r\r
21 /** Holds the start character position of the next word*/\r\r
22 private int nextWordPos = -1;\r\r
23 /** The actual text that is being tokenized*/\r\r
24 private Document document;\r\r
25 /** The character iterator over the document*/\r\r
26 private Segment text;\r\r
27 /** The cumulative word count that have been processed*/\r\r
28 private int wordCount = 0;\r\r
29 /** Flag indicating if there are any more tokens (words) left*/\r\r
30 private boolean moreTokens = true;\r\r
31 /** Is this a special case where the currentWordStart, currntWordEnd and\r\r
32 * nextWordPos have already been calculated. (see nextWord)\r\r
33 */\r\r
34 private boolean first = true;\r\r
35\r\r
36 private BreakIterator sentanceIterator;\r\r
37 private boolean startsSentance = true;\r\r
38\r\r
39\r\r
40 public DocumentWordTokenizer(Document document) {\r\r
41 this.document = document;\r\r
42 //Create a text segment over the etire document\r\r
43 text = new Segment();\r\r
44 sentanceIterator = BreakIterator.getSentenceInstance();\r\r
45 try {\r\r
46 document.getText(0, document.getLength(), text);\r\r
47 sentanceIterator.setText(text);\r\r
48 currentWordPos = getNextWordStart(text, 0);\r\r
49 //If the current word pos is -1 then the string was all white space\r\r
50 if (currentWordPos != -1) {\r\r
51 currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
52 nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
53 } else {\r\r
54 moreTokens = false;\r\r
55 }\r\r
56 } catch (BadLocationException ex) {\r\r
57 moreTokens = false;\r\r
58 }\r\r
59 }\r\r
60\r\r
61 /** This helper method will return the start character of the next\r\r
62 * word in the buffer from the start position\r\r
63 */\r\r
64 private static int getNextWordStart(Segment text, int startPos) {\r\r
65 if (startPos <= text.getEndIndex())\r\r
66 for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {\r\r
67 if (Character.isLetterOrDigit(ch)) {\r\r
68 return text.getIndex();\r\r
69 }\r\r
70 }\r\r
71 return -1;\r\r
72 }\r\r
73\r\r
74 /** This helper method will return the end of the next word in the buffer.\r\r
75 *\r\r
76 */\r\r
77 private static int getNextWordEnd(Segment text, int startPos) {\r\r
78 for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {\r\r
79 if (!Character.isLetterOrDigit(ch)) {\r\r
80 return text.getIndex();\r\r
81 }\r\r
82 }\r\r
83 return text.getEndIndex();\r\r
84 }\r\r
85\r\r
86\r\r
87 /** Returns true if there are more words that can be processed in the string\r\r
88 *\r\r
89 */\r\r
90 public boolean hasMoreWords() {\r\r
91 return moreTokens;\r\r
92 }\r\r
93\r\r
94 /** Returns the current character position in the text\r\r
95 *\r\r
96 */\r\r
97 public int getCurrentWordPosition() {\r\r
98 return currentWordPos;\r\r
99 }\r\r
100\r\r
101 /** Returns the current end word position in the text\r\r
102 *\r\r
103 */\r\r
104 public int getCurrentWordEnd() {\r\r
105 return currentWordEnd;\r\r
106 }\r\r
107\r\r
108\r\r
109 /** Returns the next word in the text\r\r
110 *\r\r
111 */\r\r
112 public String nextWord() {\r\r
113 if (!first) {\r\r
114 currentWordPos = nextWordPos;\r\r
115 currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
116 nextWordPos = getNextWordStart(text, currentWordEnd+1);\r\r
117 int current = sentanceIterator.current();\r\r
118 if (current == currentWordPos)\r\r
119 startsSentance = true;\r\r
120 else {\r\r
121 startsSentance = false;\r\r
122 if (currentWordEnd > current)\r\r
123 sentanceIterator.next();\r\r
124 }\r\r
125\r\r
126 }\r\r
127 //The nextWordPos has already been populated\r\r
128 String word = null;\r\r
129 try {\r\r
130 word = document.getText(currentWordPos, currentWordEnd-currentWordPos);\r\r
131 } catch (BadLocationException ex) {\r\r
132 moreTokens = false;\r\r
133 }\r\r
134 wordCount++;\r\r
135 first = false;\r\r
136 if (nextWordPos == -1)\r\r
137 moreTokens = false;\r\r
138 return word;\r\r
139 }\r\r
140\r\r
141 /** Returns the current number of words that have been processed\r\r
142 *\r\r
143 */\r\r
144 public int getCurrentWordCount() {\r\r
145 return wordCount;\r\r
146 }\r\r
147\r\r
148 /** Replaces the current word token*/\r\r
149 public void replaceWord(String newWord) {\r\r
150 if (currentWordPos != -1) {\r\r
151 try {\r\r
152 /* ORIGINAL\r\r
153 document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
154 document.insertString(currentWordPos, newWord, null);\r\r
155 */\r\r
156 // Howard's Version for Ekit\r\r
157 Element element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);\r\r
158 AttributeSet attribs = element.getAttributes();\r\r
159 document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
160 document.insertString(currentWordPos, newWord, attribs);\r\r
161 // End Howard's Version\r\r
162 //Need to reset the segment\r\r
163 document.getText(0, document.getLength(), text);\r\r
164 } catch (BadLocationException ex) {\r\r
165 throw new RuntimeException(ex.getMessage());\r\r
166 }\r\r
167 //Position after the newly replaced word(s)\r\r
168 //Position after the newly replaced word(s)\r\r
169 first = true;\r\r
170 currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());\r\r
171 if (currentWordPos != -1) {\r\r
172 currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
173 nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
174 sentanceIterator.setText(text);\r\r
175 sentanceIterator.following(currentWordPos);\r\r
176 } else moreTokens = false;\r\r
177 }\r\r
178 }\r\r
179\r\r
180 /** Returns the current text that is being tokenized (includes any changes\r\r
181 * that have been made)\r\r
182 */\r\r
183 public String getContext() {\r\r
184 return text.toString();\r\r
185 }\r\r
186\r\r
187 /** Returns true iif the current word is at the start of a sentance*/\r\r
188 public boolean isNewSentance() {\r\r
189 return startsSentance;\r\r
190 }\r\r
191\r\r
192}