Initial revision
[old-projects.git] / ekit / com / swabunga / spell / event / SpellChecker.java
1 package com.swabunga.spell.event;
2
3 import com.swabunga.spell.engine.*;
4 import java.util.*;
5
6 /**
7 * This is the main class for spell checking (using the new event based spell
8 * checking).
9 *
10 * @author Jason Height (jheight@chariot.net.au)
11 * @created 19 June 2002
12 */
13 public class SpellChecker {
14 /** Flag indicating that the Spell Check completed without any errors present*/
15 public static final int SPELLCHECK_OK=-1;
16 /** Flag indicating that the Spell Check completed due to user cancellation*/
17 public static final int SPELLCHECK_CANCEL=-2;
18
19 private List eventListeners = new ArrayList();
20 private SpellDictionary dictionary;
21
22 private Configuration config = Configuration.getConfiguration();
23
24 /**This variable holds all of the words that are to be always ignored */
25 private Set ignoredWords = new HashSet();
26 private Map autoReplaceWords = new HashMap();
27
28
29 /**
30 * Constructs the SpellChecker. The default threshold is used
31 *
32 * @param dictionary Description of the Parameter
33 */
34 public SpellChecker(SpellDictionary dictionary) {
35 if (dictionary == null) {
36 throw new IllegalArgumentException("dictionary must non-null");
37 }
38 this.dictionary = dictionary;
39 }
40
41
42 /**
43 * Constructs the SpellChecker with a threshold
44 *
45 * @param dictionary Description of the Parameter
46 * @param threshold Description of the Parameter
47 */
48 public SpellChecker(SpellDictionary dictionary, int threshold) {
49 this(dictionary);
50 config.setInteger( Configuration.SPELL_THRESHOLD, threshold );
51 }
52
53
54 /**
55 *Adds a SpellCheckListener
56 *
57 * @param listener The feature to be added to the SpellCheckListener attribute
58 */
59 public void addSpellCheckListener(SpellCheckListener listener) {
60 eventListeners.add(listener);
61 }
62
63
64 /**
65 *Removes a SpellCheckListener
66 *
67 * @param listener Description of the Parameter
68 */
69 public void removeSpellCheckListener(SpellCheckListener listener) {
70 eventListeners.remove(listener);
71 }
72
73
74 /**
75 * Fires off a spell check event to the listeners.
76 *
77 * @param event Description of the Parameter
78 */
79 protected void fireSpellCheckEvent(SpellCheckEvent event) {
80 for (int i = eventListeners.size() - 1; i >= 0; i--) {
81 ((SpellCheckListener) eventListeners.get(i)).spellingError(event);
82 }
83 }
84
85
86 /**
87 * This method clears the words that are currently being remembered as
88 * Ignore All words and Replace All words.
89 */
90 public void reset() {
91 ignoredWords.clear();
92 autoReplaceWords.clear();
93 }
94
95
96 /**
97 * Checks the text string.
98 * <p>
99 * Returns the corrected string.
100 *
101 * @param text Description of the Parameter
102 * @return Description of the Return Value
103 * @deprecated use checkSpelling(WordTokenizer)
104 */
105 public String checkString(String text) {
106 StringWordTokenizer tokens = new StringWordTokenizer(text);
107 checkSpelling(tokens);
108 return tokens.getFinalText();
109 }
110
111
112 /**
113 * Returns true iif this word contains a digit
114 *
115 * @param word Description of the Parameter
116 * @return The digitWord value
117 */
118 private final static boolean isDigitWord(String word) {
119 for (int i = word.length() - 1; i >= 0; i--) {
120 if (Character.isDigit(word.charAt(i))) {
121 return true;
122 }
123 }
124 return false;
125 }
126
127
128 /**
129 * Returns true iif this word looks like an internet address
130 *
131 * @param word Description of the Parameter
132 * @return The iNETWord value
133 */
134 private final static boolean isINETWord(String word) {
135 //JMH TBD
136 return false;
137 }
138
139
140 /**
141 * Returns true iif this word contains all upper case characters
142 *
143 * @param word Description of the Parameter
144 * @return The upperCaseWord value
145 */
146 private final static boolean isUpperCaseWord(String word) {
147 for (int i = word.length() - 1; i >= 0; i--) {
148 if (Character.isLowerCase(word.charAt(i))) {
149 return false;
150 }
151 }
152 return true;
153 }
154
155
156 /**
157 * Returns true iif this word contains mixed case characters
158 *
159 * @param word Description of the Parameter
160 * @param startsSentance True if this word is at the start of a sentance
161 * @return The mixedCaseWord value
162 */
163 private final static boolean isMixedCaseWord(String word, boolean startsSentance) {
164 int strLen = word.length();
165 boolean isUpper = Character.isUpperCase(word.charAt(0));
166 //Ignore the first character if this word starts the sentance and the first
167 //character was upper cased, since this is normal behaviour
168 if ((startsSentance) && isUpper && (strLen > 1))
169 isUpper = Character.isUpperCase(word.charAt(1));
170 if (isUpper) {
171 for (int i = word.length() - 1; i > 0; i--) {
172 if (Character.isLowerCase(word.charAt(i))) {
173 return true;
174 }
175 }
176 } else {
177 for (int i = word.length() - 1; i > 0; i--) {
178 if (Character.isUpperCase(word.charAt(i))) {
179 return true;
180 }
181 }
182 }
183 return false;
184 }
185
186
187 /**
188 * This method will fire the spell check event and then handle the event
189 * action that has been selected by the user.
190 *
191 * @param tokenizer Description of the Parameter
192 * @param event Description of the Parameter
193 * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue
194 */
195 protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {
196 fireSpellCheckEvent(event);
197 String word = event.getInvalidWord();
198 //Work out what to do in response to the event.
199 switch (event.getAction()) {
200 case SpellCheckEvent.INITIAL:
201 break;
202 case SpellCheckEvent.IGNORE:
203 break;
204 case SpellCheckEvent.IGNOREALL:
205 if (!ignoredWords.contains(word)) {
206 ignoredWords.add(word);
207 }
208 break;
209 case SpellCheckEvent.REPLACE:
210 tokenizer.replaceWord(event.getReplaceWord());
211 break;
212 case SpellCheckEvent.REPLACEALL:
213 String replaceAllWord = event.getReplaceWord();
214 if (!autoReplaceWords.containsKey(word)) {
215 autoReplaceWords.put(word, replaceAllWord);
216 }
217 tokenizer.replaceWord(replaceAllWord);
218 break;
219 case SpellCheckEvent.ADDTODICT:
220 String addWord = event.getReplaceWord();
221 tokenizer.replaceWord(addWord);
222 dictionary.addWord(addWord);
223 break;
224 case SpellCheckEvent.CANCEL:
225 return true;
226 default:
227 throw new IllegalArgumentException("Unhandled case.");
228 }
229 return false;
230 }
231
232
233 /**
234 * This method is called to check the spelling of the words that are returned
235 * by the WordTokenizer.
236 * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent</p>
237 *
238 * @param tokenizer Description of the Parameter
239 * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that are found BEFORE and corretions are made.
240 */
241 public final int checkSpelling(WordTokenizer tokenizer) {
242 int errors = 0;
243 boolean terminated = false;
244 //Keep track of the previous word
245 String previousWord = null;
246 while (tokenizer.hasMoreWords() && !terminated) {
247 String word = tokenizer.nextWord();
248 //Check the spelling of the word
249 if (!dictionary.isCorrect(word)) {
250 if (
251 (config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentance())) ||
252 (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) ||
253 (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) ||
254 (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) {
255 //Null event. Since we are ignoring this word due
256 //to one of the above cases.
257 } else {
258 //We cant ignore this misspelt word
259 //For this invalid word are we ignoreing the misspelling?
260 if (!ignoredWords.contains(word)) {
261 errors++;
262 //Is this word being automagically replaced
263 if (autoReplaceWords.containsKey(word)) {
264 tokenizer.replaceWord((String) autoReplaceWords.get(word));
265 } else {
266 //JMH Need to somehow capitalise the suggestions if
267 //ignoreSentanceCapitalisation is not set to true
268 //Fire the event.
269 SpellCheckEvent event = new BasicSpellCheckEvent(word, dictionary.getSuggestions(word,
270 config.getInteger(Configuration.SPELL_THRESHOLD)), tokenizer);
271 terminated = fireAndHandleEvent(tokenizer, event);
272 }
273 }
274 }
275 } else {
276 //This is a correctly spelt word. However perform some extra checks
277 /*
278 * JMH TBD //Check for multiple words
279 * if (!ignoreMultipleWords &&) {
280 * }
281 */
282 //Check for capitalisation
283 if ((!config.getBoolean(Configuration.SPELL_IGNORESENTANCECAPITALIZATION)) && (tokenizer.isNewSentance())
284 && (Character.isLowerCase(word.charAt(0)))) {
285 errors++;
286 StringBuffer buf = new StringBuffer(word);
287 buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));
288 List suggestion = new LinkedList();
289 suggestion.add(new Word(buf.toString(), 0));
290 SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion,
291 tokenizer);
292 terminated = fireAndHandleEvent(tokenizer, event);
293 }
294 }
295 }
296 if (terminated)
297 return SPELLCHECK_CANCEL;
298 else if (errors == 0)
299 return SPELLCHECK_OK;
300 else return errors;
301 }
302 }
303
304