1 package com
.swabunga
.spell
.event
;
5 import javax
.swing
.text
.AttributeSet
;
6 import javax
.swing
.text
.Document
;
7 import javax
.swing
.text
.Element
;
8 import javax
.swing
.text
.Segment
;
9 import javax
.swing
.text
.BadLocationException
;
11 /** This class tokenizes a swing document model. It also allows for the
12 * document model to be changed when corrections occur.
14 * @author Jason Height (jheight@chariot.net.au)
16 public class DocumentWordTokenizer
implements WordTokenizer
{
17 /** Holds the start character position of the current word*/
18 private int currentWordPos
= 0;
19 /** Holds the end character position of the current word*/
20 private int currentWordEnd
= 0;
21 /** Holds the start character position of the next word*/
22 private int nextWordPos
= -1;
23 /** The actual text that is being tokenized*/
24 private Document document
;
25 /** The character iterator over the document*/
27 /** The cumulative word count that have been processed*/
28 private int wordCount
= 0;
29 /** Flag indicating if there are any more tokens (words) left*/
30 private boolean moreTokens
= true
;
31 /** Is this a special case where the currentWordStart, currntWordEnd and
32 * nextWordPos have already been calculated. (see nextWord)
34 private boolean first
= true
;
36 private BreakIterator sentanceIterator
;
37 private boolean startsSentance
= true
;
40 public DocumentWordTokenizer(Document document
) {
41 this.document
= document
;
42 //Create a text segment over the etire document
44 sentanceIterator
= BreakIterator
.getSentenceInstance();
46 document
.getText(0, document
.getLength(), text
);
47 sentanceIterator
.setText(text
);
48 currentWordPos
= getNextWordStart(text
, 0);
49 //If the current word pos is -1 then the string was all white space
50 if (currentWordPos
!= -1) {
51 currentWordEnd
= getNextWordEnd(text
, currentWordPos
);
52 nextWordPos
= getNextWordStart(text
, currentWordEnd
);
56 } catch (BadLocationException ex
) {
61 /** This helper method will return the start character of the next
62 * word in the buffer from the start position
64 private static int getNextWordStart(Segment text
, int startPos
) {
65 if (startPos
<= text
.getEndIndex())
66 for (char ch
= text
.setIndex(startPos
);ch
!= Segment
.DONE
;ch
= text
.next()) {
67 if (Character
.isLetterOrDigit(ch
)) {
68 return text
.getIndex();
74 /** This helper method will return the end of the next word in the buffer.
77 private static int getNextWordEnd(Segment text
, int startPos
) {
78 for (char ch
= text
.setIndex(startPos
); ch
!= Segment
.DONE
;ch
= text
.next()) {
79 if (!Character
.isLetterOrDigit(ch
)) {
80 return text
.getIndex();
83 return text
.getEndIndex();
87 /** Returns true if there are more words that can be processed in the string
90 public boolean hasMoreWords() {
94 /** Returns the current character position in the text
97 public int getCurrentWordPosition() {
98 return currentWordPos
;
101 /** Returns the current end word position in the text
104 public int getCurrentWordEnd() {
105 return currentWordEnd
;
109 /** Returns the next word in the text
112 public String
nextWord() {
114 currentWordPos
= nextWordPos
;
115 currentWordEnd
= getNextWordEnd(text
, currentWordPos
);
116 nextWordPos
= getNextWordStart(text
, currentWordEnd
+1);
117 int current
= sentanceIterator
.current();
118 if (current
== currentWordPos
)
119 startsSentance
= true
;
121 startsSentance
= false
;
122 if (currentWordEnd
> current
)
123 sentanceIterator
.next();
127 //The nextWordPos has already been populated
130 word
= document
.getText(currentWordPos
, currentWordEnd
-currentWordPos
);
131 } catch (BadLocationException ex
) {
136 if (nextWordPos
== -1)
141 /** Returns the current number of words that have been processed
144 public int getCurrentWordCount() {
148 /** Replaces the current word token*/
149 public void replaceWord(String newWord
) {
150 if (currentWordPos
!= -1) {
153 document.remove(currentWordPos, currentWordEnd - currentWordPos);
154 document.insertString(currentWordPos, newWord, null);
156 // Howard's Version for Ekit
157 Element element
= ((javax
.swing
.text
.html
.HTMLDocument
)document
).getCharacterElement(currentWordPos
);
158 AttributeSet attribs
= element
.getAttributes();
159 document
.remove(currentWordPos
, currentWordEnd
- currentWordPos
);
160 document
.insertString(currentWordPos
, newWord
, attribs
);
161 // End Howard's Version
162 //Need to reset the segment
163 document
.getText(0, document
.getLength(), text
);
164 } catch (BadLocationException ex
) {
165 throw new RuntimeException(ex
.getMessage());
167 //Position after the newly replaced word(s)
168 //Position after the newly replaced word(s)
170 currentWordPos
= getNextWordStart(text
, currentWordPos
+newWord
.length());
171 if (currentWordPos
!= -1) {
172 currentWordEnd
= getNextWordEnd(text
, currentWordPos
);
173 nextWordPos
= getNextWordStart(text
, currentWordEnd
);
174 sentanceIterator
.setText(text
);
175 sentanceIterator
.following(currentWordPos
);
176 } else moreTokens
= false
;
180 /** Returns the current text that is being tokenized (includes any changes
181 * that have been made)
183 public String
getContext() {
184 return text
.toString();
187 /** Returns true iif the current word is at the start of a sentance*/
188 public boolean isNewSentance() {
189 return startsSentance
;