ekit/com/swabunga/spell/event/DocumentWordTokenizer.java

   1 package com.swabunga.spell.event;
   2
   3 import java.util.*;
   4 import java.text.*;
   5 import javax.swing.text.AttributeSet;
   6 import javax.swing.text.Document;
   7 import javax.swing.text.Element;
   8 import javax.swing.text.Segment;
   9 import javax.swing.text.BadLocationException;
  10
  11 /** This class tokenizes a swing document model. It also allows for the
  12  *  document model to be changed when corrections occur.
  13  *
  14  * @author Jason Height (jheight@chariot.net.au)
  15  */
  16 public class DocumentWordTokenizer implements WordTokenizer {
  17   /** Holds the start character position of the current word*/
  18   private int currentWordPos = 0;
  19   /** Holds the end character position of the current word*/
  20   private int currentWordEnd = 0;
  21   /** Holds the start character position of the next word*/
  22   private int nextWordPos = -1;
  23   /** The actual text that is being tokenized*/
  24   private Document document;
  25   /** The character iterator over the document*/
  26   private Segment text;
  27   /** The cumulative word count that have been processed*/
  28   private int wordCount = 0;
  29   /** Flag indicating if there are any more tokens (words) left*/
  30   private boolean moreTokens = true;
  31   /** Is this a special case where the currentWordStart, currntWordEnd and
  32    *  nextWordPos have already been calculated. (see nextWord)
  33    */
  34   private boolean first = true;
  35
  36   private BreakIterator sentanceIterator;
  37   private boolean startsSentance = true;
  38
  39
  40   public DocumentWordTokenizer(Document document) {
  41     this.document = document;
  42     //Create a text segment over the etire document
  43     text = new Segment();
  44     sentanceIterator = BreakIterator.getSentenceInstance();
  45     try {
  46       document.getText(0, document.getLength(), text);
  47       sentanceIterator.setText(text);
  48       currentWordPos = getNextWordStart(text, 0);
  49       //If the current word pos is -1 then the string was all white space
  50       if (currentWordPos != -1) {
  51         currentWordEnd = getNextWordEnd(text, currentWordPos);
  52         nextWordPos = getNextWordStart(text, currentWordEnd);
  53       } else {
  54         moreTokens = false;
  55       }
  56     } catch (BadLocationException ex) {
  57       moreTokens = false;
  58     }
  59   }
  60
  61   /** This helper method will return the start character of the next
  62    * word in the buffer from the start position
  63    */
  64   private static int getNextWordStart(Segment text, int startPos) {
  65     if (startPos <= text.getEndIndex())
  66       for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {
  67         if (Character.isLetterOrDigit(ch)) {
  68           return text.getIndex();
  69         }
  70       }
  71     return -1;
  72   }
  73
  74   /** This helper method will return the end of the next word in the buffer.
  75    *
  76    */
  77   private static int getNextWordEnd(Segment text, int startPos) {
  78     for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {
  79       if (!Character.isLetterOrDigit(ch)) {
  80         return text.getIndex();
  81       }
  82     }
  83     return text.getEndIndex();
  84   }
  85
  86
  87   /** Returns true if there are more words that can be processed in the string
  88    *
  89    */
  90   public boolean hasMoreWords() {
  91     return moreTokens;
  92   }
  93
  94   /** Returns the current character position in the text
  95    *
  96    */
  97   public int getCurrentWordPosition() {
  98     return currentWordPos;
  99   }
 100
 101   /** Returns the current end word position in the text
 102    *
 103    */
 104   public int getCurrentWordEnd() {
 105     return currentWordEnd;
 106   }
 107
 108
 109   /** Returns the next word in the text
 110    *
 111    */
 112   public String nextWord() {
 113     if (!first) {
 114       currentWordPos = nextWordPos;
 115       currentWordEnd = getNextWordEnd(text, currentWordPos);
 116       nextWordPos = getNextWordStart(text, currentWordEnd+1);
 117       int current = sentanceIterator.current();
 118       if (current == currentWordPos)
 119         startsSentance = true;
 120       else {
 121         startsSentance = false;
 122         if (currentWordEnd > current)
 123           sentanceIterator.next();
 124       }
 125
 126     }
 127     //The nextWordPos has already been populated
 128     String word = null;
 129     try {
 130       word = document.getText(currentWordPos, currentWordEnd-currentWordPos);
 131     } catch (BadLocationException ex) {
 132       moreTokens = false;
 133     }
 134     wordCount++;
 135     first = false;
 136     if (nextWordPos == -1)
 137       moreTokens = false;
 138     return word;
 139   }
 140
 141   /** Returns the current number of words that have been processed
 142    *
 143    */
 144   public int getCurrentWordCount() {
 145     return wordCount;
 146   }
 147
 148   /** Replaces the current word token*/
 149   public void replaceWord(String newWord) {
 150     if (currentWordPos != -1) {
 151       try {
 152       /* ORIGINAL
 153         document.remove(currentWordPos, currentWordEnd - currentWordPos);
 154         document.insertString(currentWordPos, newWord, null);
 155       */
 156       // Howard's Version for Ekit
 157                 Element element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);
 158                 AttributeSet attribs = element.getAttributes();
 159         document.remove(currentWordPos, currentWordEnd - currentWordPos);
 160         document.insertString(currentWordPos, newWord, attribs);
 161       // End Howard's Version
 162         //Need to reset the segment
 163         document.getText(0, document.getLength(), text);
 164       } catch (BadLocationException ex) {
 165         throw new RuntimeException(ex.getMessage());
 166       }
 167       //Position after the newly replaced word(s)
 168       //Position after the newly replaced word(s)
 169       first = true;
 170       currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());
 171       if (currentWordPos != -1) {
 172         currentWordEnd = getNextWordEnd(text, currentWordPos);
 173         nextWordPos = getNextWordStart(text, currentWordEnd);
 174         sentanceIterator.setText(text);
 175         sentanceIterator.following(currentWordPos);
 176       } else moreTokens = false;
 177     }
 178   }
 179
 180   /** Returns the current text that is being tokenized (includes any changes
 181    *  that have been made)
 182    */
 183   public String getContext() {
 184     return text.toString();
 185   }
 186
 187   /** Returns true iif the current word is at the start of a sentance*/
 188   public boolean isNewSentance() {
 189     return startsSentance;
 190   }
 191
 192 }