| 1 | package com.swabunga.spell.event;\r\r |
| 2 | \r\r |
| 3 | import com.swabunga.spell.engine.*;\r\r |
| 4 | import java.util.*;\r\r |
| 5 | \r\r |
| 6 | /**\r\r |
| 7 | * This is the main class for spell checking (using the new event based spell\r\r |
| 8 | * checking).\r\r |
| 9 | *\r\r |
| 10 | * @author Jason Height (jheight@chariot.net.au)\r\r |
| 11 | * @created 19 June 2002\r\r |
| 12 | */\r\r |
| 13 | public class SpellChecker {\r\r |
| 14 | /** Flag indicating that the Spell Check completed without any errors present*/\r\r |
| 15 | public static final int SPELLCHECK_OK=-1;\r\r |
| 16 | /** Flag indicating that the Spell Check completed due to user cancellation*/\r\r |
| 17 | public static final int SPELLCHECK_CANCEL=-2;\r\r |
| 18 | \r\r |
| 19 | private List eventListeners = new ArrayList();\r\r |
| 20 | private SpellDictionary dictionary;\r\r |
| 21 | \r\r |
| 22 | private Configuration config = Configuration.getConfiguration();\r\r |
| 23 | \r\r |
| 24 | /**This variable holds all of the words that are to be always ignored */\r\r |
| 25 | private Set ignoredWords = new HashSet();\r\r |
| 26 | private Map autoReplaceWords = new HashMap();\r\r |
| 27 | \r\r |
| 28 | \r\r |
| 29 | /**\r\r |
| 30 | * Constructs the SpellChecker. The default threshold is used\r\r |
| 31 | *\r\r |
| 32 | * @param dictionary Description of the Parameter\r\r |
| 33 | */\r\r |
| 34 | public SpellChecker(SpellDictionary dictionary) {\r\r |
| 35 | if (dictionary == null) {\r\r |
| 36 | throw new IllegalArgumentException("dictionary must non-null");\r\r |
| 37 | }\r\r |
| 38 | this.dictionary = dictionary;\r\r |
| 39 | }\r\r |
| 40 | \r\r |
| 41 | \r\r |
| 42 | /**\r\r |
| 43 | * Constructs the SpellChecker with a threshold\r\r |
| 44 | *\r\r |
| 45 | * @param dictionary Description of the Parameter\r\r |
| 46 | * @param threshold Description of the Parameter\r\r |
| 47 | */\r\r |
| 48 | public SpellChecker(SpellDictionary dictionary, int threshold) {\r\r |
| 49 | this(dictionary);\r\r |
| 50 | config.setInteger( Configuration.SPELL_THRESHOLD, threshold );\r\r |
| 51 | }\r\r |
| 52 | \r\r |
| 53 | \r\r |
| 54 | /**\r\r |
| 55 | *Adds a SpellCheckListener\r\r |
| 56 | *\r\r |
| 57 | * @param listener The feature to be added to the SpellCheckListener attribute\r\r |
| 58 | */\r\r |
| 59 | public void addSpellCheckListener(SpellCheckListener listener) {\r\r |
| 60 | eventListeners.add(listener);\r\r |
| 61 | }\r\r |
| 62 | \r\r |
| 63 | \r\r |
| 64 | /**\r\r |
| 65 | *Removes a SpellCheckListener\r\r |
| 66 | *\r\r |
| 67 | * @param listener Description of the Parameter\r\r |
| 68 | */\r\r |
| 69 | public void removeSpellCheckListener(SpellCheckListener listener) {\r\r |
| 70 | eventListeners.remove(listener);\r\r |
| 71 | }\r\r |
| 72 | \r\r |
| 73 | \r\r |
| 74 | /**\r\r |
| 75 | * Fires off a spell check event to the listeners.\r\r |
| 76 | *\r\r |
| 77 | * @param event Description of the Parameter\r\r |
| 78 | */\r\r |
| 79 | protected void fireSpellCheckEvent(SpellCheckEvent event) {\r\r |
| 80 | for (int i = eventListeners.size() - 1; i >= 0; i--) {\r\r |
| 81 | ((SpellCheckListener) eventListeners.get(i)).spellingError(event);\r\r |
| 82 | }\r\r |
| 83 | }\r\r |
| 84 | \r\r |
| 85 | \r\r |
| 86 | /**\r\r |
| 87 | * This method clears the words that are currently being remembered as\r\r |
| 88 | * Ignore All words and Replace All words.\r\r |
| 89 | */\r\r |
| 90 | public void reset() {\r\r |
| 91 | ignoredWords.clear();\r\r |
| 92 | autoReplaceWords.clear();\r\r |
| 93 | }\r\r |
| 94 | \r\r |
| 95 | \r\r |
| 96 | /**\r\r |
| 97 | * Checks the text string.\r\r |
| 98 | * <p>\r\r |
| 99 | * Returns the corrected string.\r\r |
| 100 | *\r\r |
| 101 | * @param text Description of the Parameter\r\r |
| 102 | * @return Description of the Return Value\r\r |
| 103 | * @deprecated use checkSpelling(WordTokenizer)\r\r |
| 104 | */\r\r |
| 105 | public String checkString(String text) {\r\r |
| 106 | StringWordTokenizer tokens = new StringWordTokenizer(text);\r\r |
| 107 | checkSpelling(tokens);\r\r |
| 108 | return tokens.getFinalText();\r\r |
| 109 | }\r\r |
| 110 | \r\r |
| 111 | \r\r |
| 112 | /**\r\r |
| 113 | * Returns true iif this word contains a digit\r\r |
| 114 | *\r\r |
| 115 | * @param word Description of the Parameter\r\r |
| 116 | * @return The digitWord value\r\r |
| 117 | */\r\r |
| 118 | private final static boolean isDigitWord(String word) {\r\r |
| 119 | for (int i = word.length() - 1; i >= 0; i--) {\r\r |
| 120 | if (Character.isDigit(word.charAt(i))) {\r\r |
| 121 | return true;\r\r |
| 122 | }\r\r |
| 123 | }\r\r |
| 124 | return false;\r\r |
| 125 | }\r\r |
| 126 | \r\r |
| 127 | \r\r |
| 128 | /**\r\r |
| 129 | * Returns true iif this word looks like an internet address\r\r |
| 130 | *\r\r |
| 131 | * @param word Description of the Parameter\r\r |
| 132 | * @return The iNETWord value\r\r |
| 133 | */\r\r |
| 134 | private final static boolean isINETWord(String word) {\r\r |
| 135 | //JMH TBD\r\r |
| 136 | return false;\r\r |
| 137 | }\r\r |
| 138 | \r\r |
| 139 | \r\r |
| 140 | /**\r\r |
| 141 | * Returns true iif this word contains all upper case characters\r\r |
| 142 | *\r\r |
| 143 | * @param word Description of the Parameter\r\r |
| 144 | * @return The upperCaseWord value\r\r |
| 145 | */\r\r |
| 146 | private final static boolean isUpperCaseWord(String word) {\r\r |
| 147 | for (int i = word.length() - 1; i >= 0; i--) {\r\r |
| 148 | if (Character.isLowerCase(word.charAt(i))) {\r\r |
| 149 | return false;\r\r |
| 150 | }\r\r |
| 151 | }\r\r |
| 152 | return true;\r\r |
| 153 | }\r\r |
| 154 | \r\r |
| 155 | \r\r |
| 156 | /**\r\r |
| 157 | * Returns true iif this word contains mixed case characters\r\r |
| 158 | *\r\r |
| 159 | * @param word Description of the Parameter\r\r |
| 160 | * @param startsSentance True if this word is at the start of a sentance\r\r |
| 161 | * @return The mixedCaseWord value\r\r |
| 162 | */\r\r |
| 163 | private final static boolean isMixedCaseWord(String word, boolean startsSentance) {\r\r |
| 164 | int strLen = word.length();\r\r |
| 165 | boolean isUpper = Character.isUpperCase(word.charAt(0));\r\r |
| 166 | //Ignore the first character if this word starts the sentance and the first\r\r |
| 167 | //character was upper cased, since this is normal behaviour\r\r |
| 168 | if ((startsSentance) && isUpper && (strLen > 1))\r\r |
| 169 | isUpper = Character.isUpperCase(word.charAt(1));\r\r |
| 170 | if (isUpper) {\r\r |
| 171 | for (int i = word.length() - 1; i > 0; i--) {\r\r |
| 172 | if (Character.isLowerCase(word.charAt(i))) {\r\r |
| 173 | return true;\r\r |
| 174 | }\r\r |
| 175 | }\r\r |
| 176 | } else {\r\r |
| 177 | for (int i = word.length() - 1; i > 0; i--) {\r\r |
| 178 | if (Character.isUpperCase(word.charAt(i))) {\r\r |
| 179 | return true;\r\r |
| 180 | }\r\r |
| 181 | }\r\r |
| 182 | }\r\r |
| 183 | return false;\r\r |
| 184 | }\r\r |
| 185 | \r\r |
| 186 | \r\r |
| 187 | /**\r\r |
| 188 | * This method will fire the spell check event and then handle the event\r\r |
| 189 | * action that has been selected by the user.\r\r |
| 190 | *\r\r |
| 191 | * @param tokenizer Description of the Parameter\r\r |
| 192 | * @param event Description of the Parameter\r\r |
| 193 | * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue\r\r |
| 194 | */\r\r |
| 195 | protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {\r\r |
| 196 | fireSpellCheckEvent(event);\r\r |
| 197 | String word = event.getInvalidWord();\r\r |
| 198 | //Work out what to do in response to the event.\r\r |
| 199 | switch (event.getAction()) {\r\r |
| 200 | case SpellCheckEvent.INITIAL:\r\r |
| 201 | break;\r\r |
| 202 | case SpellCheckEvent.IGNORE:\r\r |
| 203 | break;\r\r |
| 204 | case SpellCheckEvent.IGNOREALL:\r\r |
| 205 | if (!ignoredWords.contains(word)) {\r\r |
| 206 | ignoredWords.add(word);\r\r |
| 207 | }\r\r |
| 208 | break;\r\r |
| 209 | case SpellCheckEvent.REPLACE:\r\r |
| 210 | tokenizer.replaceWord(event.getReplaceWord());\r\r |
| 211 | break;\r\r |
| 212 | case SpellCheckEvent.REPLACEALL:\r\r |
| 213 | String replaceAllWord = event.getReplaceWord();\r\r |
| 214 | if (!autoReplaceWords.containsKey(word)) {\r\r |
| 215 | autoReplaceWords.put(word, replaceAllWord);\r\r |
| 216 | }\r\r |
| 217 | tokenizer.replaceWord(replaceAllWord);\r\r |
| 218 | break;\r\r |
| 219 | case SpellCheckEvent.ADDTODICT:\r\r |
| 220 | String addWord = event.getReplaceWord();\r\r |
| 221 | tokenizer.replaceWord(addWord);\r\r |
| 222 | dictionary.addWord(addWord);\r\r |
| 223 | break;\r\r |
| 224 | case SpellCheckEvent.CANCEL:\r\r |
| 225 | return true;\r\r |
| 226 | default:\r\r |
| 227 | throw new IllegalArgumentException("Unhandled case.");\r\r |
| 228 | }\r\r |
| 229 | return false;\r\r |
| 230 | }\r\r |
| 231 | \r\r |
| 232 | \r\r |
| 233 | /**\r\r |
| 234 | * This method is called to check the spelling of the words that are returned\r\r |
| 235 | * by the WordTokenizer.\r\r |
| 236 | * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent</p>\r\r |
| 237 | *\r\r |
| 238 | * @param tokenizer Description of the Parameter\r\r |
| 239 | * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that are found BEFORE and corretions are made.\r\r |
| 240 | */\r\r |
| 241 | public final int checkSpelling(WordTokenizer tokenizer) {\r\r |
| 242 | int errors = 0;\r\r |
| 243 | boolean terminated = false;\r\r |
| 244 | //Keep track of the previous word\r\r |
| 245 | String previousWord = null;\r\r |
| 246 | while (tokenizer.hasMoreWords() && !terminated) {\r\r |
| 247 | String word = tokenizer.nextWord();\r\r |
| 248 | //Check the spelling of the word\r\r |
| 249 | if (!dictionary.isCorrect(word)) {\r\r |
| 250 | if (\r\r |
| 251 | (config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentance())) ||\r\r |
| 252 | (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) ||\r\r |
| 253 | (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) ||\r\r |
| 254 | (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) {\r\r |
| 255 | //Null event. Since we are ignoring this word due\r\r |
| 256 | //to one of the above cases.\r\r |
| 257 | } else {\r\r |
| 258 | //We cant ignore this misspelt word\r\r |
| 259 | //For this invalid word are we ignoreing the misspelling?\r\r |
| 260 | if (!ignoredWords.contains(word)) {\r\r |
| 261 | errors++;\r\r |
| 262 | //Is this word being automagically replaced\r\r |
| 263 | if (autoReplaceWords.containsKey(word)) {\r\r |
| 264 | tokenizer.replaceWord((String) autoReplaceWords.get(word));\r\r |
| 265 | } else {\r\r |
| 266 | //JMH Need to somehow capitalise the suggestions if\r\r |
| 267 | //ignoreSentanceCapitalisation is not set to true\r\r |
| 268 | //Fire the event.\r\r |
| 269 | SpellCheckEvent event = new BasicSpellCheckEvent(word, dictionary.getSuggestions(word,\r\r |
| 270 | config.getInteger(Configuration.SPELL_THRESHOLD)), tokenizer);\r\r |
| 271 | terminated = fireAndHandleEvent(tokenizer, event);\r\r |
| 272 | }\r\r |
| 273 | }\r\r |
| 274 | }\r\r |
| 275 | } else {\r\r |
| 276 | //This is a correctly spelt word. However perform some extra checks\r\r |
| 277 | /*\r\r |
| 278 | * JMH TBD //Check for multiple words\r\r |
| 279 | * if (!ignoreMultipleWords &&) {\r\r |
| 280 | * }\r\r |
| 281 | */\r\r |
| 282 | //Check for capitalisation\r\r |
| 283 | if ((!config.getBoolean(Configuration.SPELL_IGNORESENTANCECAPITALIZATION)) && (tokenizer.isNewSentance())\r\r |
| 284 | && (Character.isLowerCase(word.charAt(0)))) {\r\r |
| 285 | errors++;\r\r |
| 286 | StringBuffer buf = new StringBuffer(word);\r\r |
| 287 | buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));\r\r |
| 288 | List suggestion = new LinkedList();\r\r |
| 289 | suggestion.add(new Word(buf.toString(), 0));\r\r |
| 290 | SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion,\r\r |
| 291 | tokenizer);\r\r |
| 292 | terminated = fireAndHandleEvent(tokenizer, event);\r\r |
| 293 | }\r\r |
| 294 | }\r\r |
| 295 | }\r\r |
| 296 | if (terminated)\r\r |
| 297 | return SPELLCHECK_CANCEL;\r\r |
| 298 | else if (errors == 0)\r\r |
| 299 | return SPELLCHECK_OK;\r\r |
| 300 | else return errors;\r\r |
| 301 | }\r\r |
| 302 | }\r\r |
| 303 | \r\r |
| 304 | \r\r |