Commit | Line | Data |
---|---|---|
6dd70280 JL |
1 | package com.swabunga.spell.event;\r\r |
2 | \r\r | |
3 | import com.swabunga.spell.engine.*;\r\r | |
4 | import java.util.*;\r\r | |
5 | \r\r | |
6 | /**\r\r | |
7 | * This is the main class for spell checking (using the new event based spell\r\r | |
8 | * checking).\r\r | |
9 | *\r\r | |
10 | * @author Jason Height (jheight@chariot.net.au)\r\r | |
11 | * @created 19 June 2002\r\r | |
12 | */\r\r | |
13 | public class SpellChecker {\r\r | |
14 | /** Flag indicating that the Spell Check completed without any errors present*/\r\r | |
15 | public static final int SPELLCHECK_OK=-1;\r\r | |
16 | /** Flag indicating that the Spell Check completed due to user cancellation*/\r\r | |
17 | public static final int SPELLCHECK_CANCEL=-2;\r\r | |
18 | \r\r | |
19 | private List eventListeners = new ArrayList();\r\r | |
20 | private SpellDictionary dictionary;\r\r | |
21 | \r\r | |
22 | private Configuration config = Configuration.getConfiguration();\r\r | |
23 | \r\r | |
24 | /**This variable holds all of the words that are to be always ignored */\r\r | |
25 | private Set ignoredWords = new HashSet();\r\r | |
26 | private Map autoReplaceWords = new HashMap();\r\r | |
27 | \r\r | |
28 | \r\r | |
29 | /**\r\r | |
30 | * Constructs the SpellChecker. The default threshold is used\r\r | |
31 | *\r\r | |
32 | * @param dictionary Description of the Parameter\r\r | |
33 | */\r\r | |
34 | public SpellChecker(SpellDictionary dictionary) {\r\r | |
35 | if (dictionary == null) {\r\r | |
36 | throw new IllegalArgumentException("dictionary must non-null");\r\r | |
37 | }\r\r | |
38 | this.dictionary = dictionary;\r\r | |
39 | }\r\r | |
40 | \r\r | |
41 | \r\r | |
42 | /**\r\r | |
43 | * Constructs the SpellChecker with a threshold\r\r | |
44 | *\r\r | |
45 | * @param dictionary Description of the Parameter\r\r | |
46 | * @param threshold Description of the Parameter\r\r | |
47 | */\r\r | |
48 | public SpellChecker(SpellDictionary dictionary, int threshold) {\r\r | |
49 | this(dictionary);\r\r | |
50 | config.setInteger( Configuration.SPELL_THRESHOLD, threshold );\r\r | |
51 | }\r\r | |
52 | \r\r | |
53 | \r\r | |
54 | /**\r\r | |
55 | *Adds a SpellCheckListener\r\r | |
56 | *\r\r | |
57 | * @param listener The feature to be added to the SpellCheckListener attribute\r\r | |
58 | */\r\r | |
59 | public void addSpellCheckListener(SpellCheckListener listener) {\r\r | |
60 | eventListeners.add(listener);\r\r | |
61 | }\r\r | |
62 | \r\r | |
63 | \r\r | |
64 | /**\r\r | |
65 | *Removes a SpellCheckListener\r\r | |
66 | *\r\r | |
67 | * @param listener Description of the Parameter\r\r | |
68 | */\r\r | |
69 | public void removeSpellCheckListener(SpellCheckListener listener) {\r\r | |
70 | eventListeners.remove(listener);\r\r | |
71 | }\r\r | |
72 | \r\r | |
73 | \r\r | |
74 | /**\r\r | |
75 | * Fires off a spell check event to the listeners.\r\r | |
76 | *\r\r | |
77 | * @param event Description of the Parameter\r\r | |
78 | */\r\r | |
79 | protected void fireSpellCheckEvent(SpellCheckEvent event) {\r\r | |
80 | for (int i = eventListeners.size() - 1; i >= 0; i--) {\r\r | |
81 | ((SpellCheckListener) eventListeners.get(i)).spellingError(event);\r\r | |
82 | }\r\r | |
83 | }\r\r | |
84 | \r\r | |
85 | \r\r | |
86 | /**\r\r | |
87 | * This method clears the words that are currently being remembered as\r\r | |
88 | * Ignore All words and Replace All words.\r\r | |
89 | */\r\r | |
90 | public void reset() {\r\r | |
91 | ignoredWords.clear();\r\r | |
92 | autoReplaceWords.clear();\r\r | |
93 | }\r\r | |
94 | \r\r | |
95 | \r\r | |
96 | /**\r\r | |
97 | * Checks the text string.\r\r | |
98 | * <p>\r\r | |
99 | * Returns the corrected string.\r\r | |
100 | *\r\r | |
101 | * @param text Description of the Parameter\r\r | |
102 | * @return Description of the Return Value\r\r | |
103 | * @deprecated use checkSpelling(WordTokenizer)\r\r | |
104 | */\r\r | |
105 | public String checkString(String text) {\r\r | |
106 | StringWordTokenizer tokens = new StringWordTokenizer(text);\r\r | |
107 | checkSpelling(tokens);\r\r | |
108 | return tokens.getFinalText();\r\r | |
109 | }\r\r | |
110 | \r\r | |
111 | \r\r | |
112 | /**\r\r | |
113 | * Returns true iif this word contains a digit\r\r | |
114 | *\r\r | |
115 | * @param word Description of the Parameter\r\r | |
116 | * @return The digitWord value\r\r | |
117 | */\r\r | |
118 | private final static boolean isDigitWord(String word) {\r\r | |
119 | for (int i = word.length() - 1; i >= 0; i--) {\r\r | |
120 | if (Character.isDigit(word.charAt(i))) {\r\r | |
121 | return true;\r\r | |
122 | }\r\r | |
123 | }\r\r | |
124 | return false;\r\r | |
125 | }\r\r | |
126 | \r\r | |
127 | \r\r | |
128 | /**\r\r | |
129 | * Returns true iif this word looks like an internet address\r\r | |
130 | *\r\r | |
131 | * @param word Description of the Parameter\r\r | |
132 | * @return The iNETWord value\r\r | |
133 | */\r\r | |
134 | private final static boolean isINETWord(String word) {\r\r | |
135 | //JMH TBD\r\r | |
136 | return false;\r\r | |
137 | }\r\r | |
138 | \r\r | |
139 | \r\r | |
140 | /**\r\r | |
141 | * Returns true iif this word contains all upper case characters\r\r | |
142 | *\r\r | |
143 | * @param word Description of the Parameter\r\r | |
144 | * @return The upperCaseWord value\r\r | |
145 | */\r\r | |
146 | private final static boolean isUpperCaseWord(String word) {\r\r | |
147 | for (int i = word.length() - 1; i >= 0; i--) {\r\r | |
148 | if (Character.isLowerCase(word.charAt(i))) {\r\r | |
149 | return false;\r\r | |
150 | }\r\r | |
151 | }\r\r | |
152 | return true;\r\r | |
153 | }\r\r | |
154 | \r\r | |
155 | \r\r | |
156 | /**\r\r | |
157 | * Returns true iif this word contains mixed case characters\r\r | |
158 | *\r\r | |
159 | * @param word Description of the Parameter\r\r | |
160 | * @param startsSentance True if this word is at the start of a sentance\r\r | |
161 | * @return The mixedCaseWord value\r\r | |
162 | */\r\r | |
163 | private final static boolean isMixedCaseWord(String word, boolean startsSentance) {\r\r | |
164 | int strLen = word.length();\r\r | |
165 | boolean isUpper = Character.isUpperCase(word.charAt(0));\r\r | |
166 | //Ignore the first character if this word starts the sentance and the first\r\r | |
167 | //character was upper cased, since this is normal behaviour\r\r | |
168 | if ((startsSentance) && isUpper && (strLen > 1))\r\r | |
169 | isUpper = Character.isUpperCase(word.charAt(1));\r\r | |
170 | if (isUpper) {\r\r | |
171 | for (int i = word.length() - 1; i > 0; i--) {\r\r | |
172 | if (Character.isLowerCase(word.charAt(i))) {\r\r | |
173 | return true;\r\r | |
174 | }\r\r | |
175 | }\r\r | |
176 | } else {\r\r | |
177 | for (int i = word.length() - 1; i > 0; i--) {\r\r | |
178 | if (Character.isUpperCase(word.charAt(i))) {\r\r | |
179 | return true;\r\r | |
180 | }\r\r | |
181 | }\r\r | |
182 | }\r\r | |
183 | return false;\r\r | |
184 | }\r\r | |
185 | \r\r | |
186 | \r\r | |
187 | /**\r\r | |
188 | * This method will fire the spell check event and then handle the event\r\r | |
189 | * action that has been selected by the user.\r\r | |
190 | *\r\r | |
191 | * @param tokenizer Description of the Parameter\r\r | |
192 | * @param event Description of the Parameter\r\r | |
193 | * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue\r\r | |
194 | */\r\r | |
195 | protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {\r\r | |
196 | fireSpellCheckEvent(event);\r\r | |
197 | String word = event.getInvalidWord();\r\r | |
198 | //Work out what to do in response to the event.\r\r | |
199 | switch (event.getAction()) {\r\r | |
200 | case SpellCheckEvent.INITIAL:\r\r | |
201 | break;\r\r | |
202 | case SpellCheckEvent.IGNORE:\r\r | |
203 | break;\r\r | |
204 | case SpellCheckEvent.IGNOREALL:\r\r | |
205 | if (!ignoredWords.contains(word)) {\r\r | |
206 | ignoredWords.add(word);\r\r | |
207 | }\r\r | |
208 | break;\r\r | |
209 | case SpellCheckEvent.REPLACE:\r\r | |
210 | tokenizer.replaceWord(event.getReplaceWord());\r\r | |
211 | break;\r\r | |
212 | case SpellCheckEvent.REPLACEALL:\r\r | |
213 | String replaceAllWord = event.getReplaceWord();\r\r | |
214 | if (!autoReplaceWords.containsKey(word)) {\r\r | |
215 | autoReplaceWords.put(word, replaceAllWord);\r\r | |
216 | }\r\r | |
217 | tokenizer.replaceWord(replaceAllWord);\r\r | |
218 | break;\r\r | |
219 | case SpellCheckEvent.ADDTODICT:\r\r | |
220 | String addWord = event.getReplaceWord();\r\r | |
221 | tokenizer.replaceWord(addWord);\r\r | |
222 | dictionary.addWord(addWord);\r\r | |
223 | break;\r\r | |
224 | case SpellCheckEvent.CANCEL:\r\r | |
225 | return true;\r\r | |
226 | default:\r\r | |
227 | throw new IllegalArgumentException("Unhandled case.");\r\r | |
228 | }\r\r | |
229 | return false;\r\r | |
230 | }\r\r | |
231 | \r\r | |
232 | \r\r | |
233 | /**\r\r | |
234 | * This method is called to check the spelling of the words that are returned\r\r | |
235 | * by the WordTokenizer.\r\r | |
236 | * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent</p>\r\r | |
237 | *\r\r | |
238 | * @param tokenizer Description of the Parameter\r\r | |
239 | * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that are found BEFORE and corretions are made.\r\r | |
240 | */\r\r | |
241 | public final int checkSpelling(WordTokenizer tokenizer) {\r\r | |
242 | int errors = 0;\r\r | |
243 | boolean terminated = false;\r\r | |
244 | //Keep track of the previous word\r\r | |
245 | String previousWord = null;\r\r | |
246 | while (tokenizer.hasMoreWords() && !terminated) {\r\r | |
247 | String word = tokenizer.nextWord();\r\r | |
248 | //Check the spelling of the word\r\r | |
249 | if (!dictionary.isCorrect(word)) {\r\r | |
250 | if (\r\r | |
251 | (config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentance())) ||\r\r | |
252 | (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) ||\r\r | |
253 | (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) ||\r\r | |
254 | (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) {\r\r | |
255 | //Null event. Since we are ignoring this word due\r\r | |
256 | //to one of the above cases.\r\r | |
257 | } else {\r\r | |
258 | //We cant ignore this misspelt word\r\r | |
259 | //For this invalid word are we ignoreing the misspelling?\r\r | |
260 | if (!ignoredWords.contains(word)) {\r\r | |
261 | errors++;\r\r | |
262 | //Is this word being automagically replaced\r\r | |
263 | if (autoReplaceWords.containsKey(word)) {\r\r | |
264 | tokenizer.replaceWord((String) autoReplaceWords.get(word));\r\r | |
265 | } else {\r\r | |
266 | //JMH Need to somehow capitalise the suggestions if\r\r | |
267 | //ignoreSentanceCapitalisation is not set to true\r\r | |
268 | //Fire the event.\r\r | |
269 | SpellCheckEvent event = new BasicSpellCheckEvent(word, dictionary.getSuggestions(word,\r\r | |
270 | config.getInteger(Configuration.SPELL_THRESHOLD)), tokenizer);\r\r | |
271 | terminated = fireAndHandleEvent(tokenizer, event);\r\r | |
272 | }\r\r | |
273 | }\r\r | |
274 | }\r\r | |
275 | } else {\r\r | |
276 | //This is a correctly spelt word. However perform some extra checks\r\r | |
277 | /*\r\r | |
278 | * JMH TBD //Check for multiple words\r\r | |
279 | * if (!ignoreMultipleWords &&) {\r\r | |
280 | * }\r\r | |
281 | */\r\r | |
282 | //Check for capitalisation\r\r | |
283 | if ((!config.getBoolean(Configuration.SPELL_IGNORESENTANCECAPITALIZATION)) && (tokenizer.isNewSentance())\r\r | |
284 | && (Character.isLowerCase(word.charAt(0)))) {\r\r | |
285 | errors++;\r\r | |
286 | StringBuffer buf = new StringBuffer(word);\r\r | |
287 | buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));\r\r | |
288 | List suggestion = new LinkedList();\r\r | |
289 | suggestion.add(new Word(buf.toString(), 0));\r\r | |
290 | SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion,\r\r | |
291 | tokenizer);\r\r | |
292 | terminated = fireAndHandleEvent(tokenizer, event);\r\r | |
293 | }\r\r | |
294 | }\r\r | |
295 | }\r\r | |
296 | if (terminated)\r\r | |
297 | return SPELLCHECK_CANCEL;\r\r | |
298 | else if (errors == 0)\r\r | |
299 | return SPELLCHECK_OK;\r\r | |
300 | else return errors;\r\r | |
301 | }\r\r | |
302 | }\r\r | |
303 | \r\r | |
304 | \r\r |