1 package com
.swabunga
.spell
.event
;
6 /** This class tokenizes a input string.
8 * It also allows for the string to be mutated. The result after the spell
9 * checking is completed is available to the call to getFinalText</p>
11 * @author Jason Height (jheight@chariot.net.au)
13 public class StringWordTokenizer
implements WordTokenizer
{
14 /** Holds the start character position of the current word*/
15 private int currentWordPos
= 0;
16 /** Holds the end character position of the current word*/
17 private int currentWordEnd
= 0;
18 /** Holds the start character position of the next word*/
19 private int nextWordPos
= -1;
20 /** The actual text that is being tokenized*/
21 private StringBuffer text
;
22 /** The cumulative word count that have been processed*/
23 private int wordCount
= 0;
24 /** Flag indicating if there are any more tokens (words) left*/
25 private boolean moreTokens
= true
;
26 /** Is this a special case where the currentWordStart, currntWordEnd and
27 * nextWordPos have already been calculated. (see nextWord)
29 private boolean first
= true
;
31 private BreakIterator sentanceIterator
;
32 private boolean startsSentance
= true
;
35 public StringWordTokenizer(String text
) {
36 sentanceIterator
= BreakIterator
.getSentenceInstance();
37 sentanceIterator
.setText(text
);
38 sentanceIterator
.first();
39 //Wrap a string buffer to hopefully make things a bit easier and efficient to
41 this.text
= new StringBuffer(text
);
42 currentWordPos
= getNextWordStart(this.text
, 0);
43 //If the current word pos is -1 then the string was all white space
44 if (currentWordPos
!= -1) {
45 currentWordEnd
= getNextWordEnd(this.text
, currentWordPos
);
46 nextWordPos
= getNextWordStart(this.text
, currentWordEnd
);
52 /** This helper method will return the start character of the next
53 * word in the buffer from the start position
55 private static int getNextWordStart(StringBuffer text
, int startPos
) {
56 int size
= text
.length();
57 for (int i
=startPos
;i
<size
;i
++) {
58 if (Character
.isLetterOrDigit(text
.charAt(i
))) {
65 /** This helper method will return the end of the next word in the buffer.
68 private static int getNextWordEnd(StringBuffer text
, int startPos
) {
69 int size
= text
.length();
70 for (int i
=startPos
;i
<size
;i
++) {
71 if (!Character
.isLetterOrDigit(text
.charAt(i
))) {
79 /** Returns true if there are more words that can be processed in the string
82 public boolean hasMoreWords() {
86 /** Returns the current character position in the text
89 public int getCurrentWordPosition() {
90 return currentWordPos
;
93 /** Returns the current end word position in the text
96 public int getCurrentWordEnd() {
97 return currentWordEnd
;
100 /** Returns the next word in the text
103 public String
nextWord() {
105 currentWordPos
= nextWordPos
;
106 currentWordEnd
= getNextWordEnd(text
, currentWordPos
);
107 nextWordPos
= getNextWordStart(text
, currentWordEnd
+1);
108 int current
= sentanceIterator
.current();
109 if (current
== currentWordPos
)
110 startsSentance
= true
;
112 startsSentance
= false
;
113 if (currentWordEnd
> current
)
114 sentanceIterator
.next();
117 //The nextWordPos has already been populated
118 String word
= text
.substring(currentWordPos
, currentWordEnd
);
121 if (nextWordPos
== -1)
126 /** Returns the current number of words that have been processed
129 public int getCurrentWordCount() {
133 /** Replaces the current word token*/
134 public void replaceWord(String newWord
) {
135 if (currentWordPos
!= -1) {
136 text
.replace(currentWordPos
, currentWordEnd
, newWord
);
137 //Position after the newly replaced word(s)
139 currentWordPos
= getNextWordStart(text
, currentWordPos
+newWord
.length());
140 if (currentWordPos
!= -1) {
141 currentWordEnd
= getNextWordEnd(text
, currentWordPos
);
142 nextWordPos
= getNextWordStart(text
, currentWordEnd
);
143 sentanceIterator
.setText(text
.toString());
144 sentanceIterator
.following(currentWordPos
);
145 } else moreTokens
= false
;
149 /** returns true iif the current word is at the start of a sentance*/
150 public boolean isNewSentance() {
151 return startsSentance
;
154 /** Returns the current text that is being tokenized (includes any changes
155 * that have been made)
157 public String
getContext() {
158 return text
.toString();
161 /** This method can be used to return the final text after the schecking is complete.*/
162 public String
getFinalText() {
167 public static void main(String args
[]) {
168 StringWordTokenizer t
= new StringWordTokenizer(" This is a test problem");
169 while(t
.hasMoreWords()) {
170 String word
= t
.nextWord();
171 System
.out
.println("Word is '"+word
+"'");
172 if ("test".equals(word
)) t
.replaceWord("mightly big");
174 System
.out
.println("End text is: '"+t
.getFinalText()+"'");
176 t
= new StringWordTokenizer(" README ");
177 while(t
.hasMoreWords()) {
178 String word
= t
.nextWord();
179 System
.out
.println("Word is '"+word
+"'");
181 System
.out
.println("End text is: '"+t
.getFinalText()+"'");
183 t
= new StringWordTokenizer("This is a acronym (A.C.M.E). This is the second sentance.");
184 while(t
.hasMoreWords()) {
185 String word
= t
.nextWord();
186 System
.out
.println("Word is '"+word
+"'. Starts Sentance?="+t
.isNewSentance());
187 if (word
.equals("acronym"))
188 t
.replaceWord("test");