Initial revision
[old-projects.git] / ekit / com / swabunga / spell / event / DocumentWordTokenizer.java
1 package com.swabunga.spell.event;
2
3 import java.util.*;
4 import java.text.*;
5 import javax.swing.text.AttributeSet;
6 import javax.swing.text.Document;
7 import javax.swing.text.Element;
8 import javax.swing.text.Segment;
9 import javax.swing.text.BadLocationException;
10
11 /** This class tokenizes a swing document model. It also allows for the
12 * document model to be changed when corrections occur.
13 *
14 * @author Jason Height (jheight@chariot.net.au)
15 */
16 public class DocumentWordTokenizer implements WordTokenizer {
17 /** Holds the start character position of the current word*/
18 private int currentWordPos = 0;
19 /** Holds the end character position of the current word*/
20 private int currentWordEnd = 0;
21 /** Holds the start character position of the next word*/
22 private int nextWordPos = -1;
23 /** The actual text that is being tokenized*/
24 private Document document;
25 /** The character iterator over the document*/
26 private Segment text;
27 /** The cumulative word count that have been processed*/
28 private int wordCount = 0;
29 /** Flag indicating if there are any more tokens (words) left*/
30 private boolean moreTokens = true;
31 /** Is this a special case where the currentWordStart, currntWordEnd and
32 * nextWordPos have already been calculated. (see nextWord)
33 */
34 private boolean first = true;
35
36 private BreakIterator sentanceIterator;
37 private boolean startsSentance = true;
38
39
40 public DocumentWordTokenizer(Document document) {
41 this.document = document;
42 //Create a text segment over the etire document
43 text = new Segment();
44 sentanceIterator = BreakIterator.getSentenceInstance();
45 try {
46 document.getText(0, document.getLength(), text);
47 sentanceIterator.setText(text);
48 currentWordPos = getNextWordStart(text, 0);
49 //If the current word pos is -1 then the string was all white space
50 if (currentWordPos != -1) {
51 currentWordEnd = getNextWordEnd(text, currentWordPos);
52 nextWordPos = getNextWordStart(text, currentWordEnd);
53 } else {
54 moreTokens = false;
55 }
56 } catch (BadLocationException ex) {
57 moreTokens = false;
58 }
59 }
60
61 /** This helper method will return the start character of the next
62 * word in the buffer from the start position
63 */
64 private static int getNextWordStart(Segment text, int startPos) {
65 if (startPos <= text.getEndIndex())
66 for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {
67 if (Character.isLetterOrDigit(ch)) {
68 return text.getIndex();
69 }
70 }
71 return -1;
72 }
73
74 /** This helper method will return the end of the next word in the buffer.
75 *
76 */
77 private static int getNextWordEnd(Segment text, int startPos) {
78 for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {
79 if (!Character.isLetterOrDigit(ch)) {
80 return text.getIndex();
81 }
82 }
83 return text.getEndIndex();
84 }
85
86
87 /** Returns true if there are more words that can be processed in the string
88 *
89 */
90 public boolean hasMoreWords() {
91 return moreTokens;
92 }
93
94 /** Returns the current character position in the text
95 *
96 */
97 public int getCurrentWordPosition() {
98 return currentWordPos;
99 }
100
101 /** Returns the current end word position in the text
102 *
103 */
104 public int getCurrentWordEnd() {
105 return currentWordEnd;
106 }
107
108
109 /** Returns the next word in the text
110 *
111 */
112 public String nextWord() {
113 if (!first) {
114 currentWordPos = nextWordPos;
115 currentWordEnd = getNextWordEnd(text, currentWordPos);
116 nextWordPos = getNextWordStart(text, currentWordEnd+1);
117 int current = sentanceIterator.current();
118 if (current == currentWordPos)
119 startsSentance = true;
120 else {
121 startsSentance = false;
122 if (currentWordEnd > current)
123 sentanceIterator.next();
124 }
125
126 }
127 //The nextWordPos has already been populated
128 String word = null;
129 try {
130 word = document.getText(currentWordPos, currentWordEnd-currentWordPos);
131 } catch (BadLocationException ex) {
132 moreTokens = false;
133 }
134 wordCount++;
135 first = false;
136 if (nextWordPos == -1)
137 moreTokens = false;
138 return word;
139 }
140
141 /** Returns the current number of words that have been processed
142 *
143 */
144 public int getCurrentWordCount() {
145 return wordCount;
146 }
147
148 /** Replaces the current word token*/
149 public void replaceWord(String newWord) {
150 if (currentWordPos != -1) {
151 try {
152 /* ORIGINAL
153 document.remove(currentWordPos, currentWordEnd - currentWordPos);
154 document.insertString(currentWordPos, newWord, null);
155 */
156 // Howard's Version for Ekit
157 Element element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);
158 AttributeSet attribs = element.getAttributes();
159 document.remove(currentWordPos, currentWordEnd - currentWordPos);
160 document.insertString(currentWordPos, newWord, attribs);
161 // End Howard's Version
162 //Need to reset the segment
163 document.getText(0, document.getLength(), text);
164 } catch (BadLocationException ex) {
165 throw new RuntimeException(ex.getMessage());
166 }
167 //Position after the newly replaced word(s)
168 //Position after the newly replaced word(s)
169 first = true;
170 currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());
171 if (currentWordPos != -1) {
172 currentWordEnd = getNextWordEnd(text, currentWordPos);
173 nextWordPos = getNextWordStart(text, currentWordEnd);
174 sentanceIterator.setText(text);
175 sentanceIterator.following(currentWordPos);
176 } else moreTokens = false;
177 }
178 }
179
180 /** Returns the current text that is being tokenized (includes any changes
181 * that have been made)
182 */
183 public String getContext() {
184 return text.toString();
185 }
186
187 /** Returns true iif the current word is at the start of a sentance*/
188 public boolean isNewSentance() {
189 return startsSentance;
190 }
191
192 }