[old-projects.git] / ekit / com / swabunga / spell / event / DocumentWordTokenizer.java

package com.swabunga.spell.event;\r\r
\r\r
import java.util.*;\r\r
import java.text.*;\r\r
import javax.swing.text.AttributeSet;\r\r
import javax.swing.text.Document;\r\r
import javax.swing.text.Element;\r\r
import javax.swing.text.Segment;\r\r
import javax.swing.text.BadLocationException;\r\r
\r\r
/** This class tokenizes a swing document model. It also allows for the\r\r
 *  document model to be changed when corrections occur.\r\r
 *\r\r
 * @author Jason Height (jheight@chariot.net.au)\r\r
 */\r\r
public class DocumentWordTokenizer implements WordTokenizer {\r\r
  /** Holds the start character position of the current word*/\r\r
  private int currentWordPos = 0;\r\r
  /** Holds the end character position of the current word*/\r\r
  private int currentWordEnd = 0;\r\r
  /** Holds the start character position of the next word*/\r\r
  private int nextWordPos = -1;\r\r
  /** The actual text that is being tokenized*/\r\r
  private Document document;\r\r
  /** The character iterator over the document*/\r\r
  private Segment text;\r\r
  /** The cumulative word count that have been processed*/\r\r
  private int wordCount = 0;\r\r
  /** Flag indicating if there are any more tokens (words) left*/\r\r
  private boolean moreTokens = true;\r\r
  /** Is this a special case where the currentWordStart, currntWordEnd and\r\r
   *  nextWordPos have already been calculated. (see nextWord)\r\r
   */\r\r
  private boolean first = true;\r\r
\r\r
  private BreakIterator sentanceIterator;\r\r
  private boolean startsSentance = true;\r\r
\r\r
\r\r
  public DocumentWordTokenizer(Document document) {\r\r
    this.document = document;\r\r
    //Create a text segment over the etire document\r\r
    text = new Segment();\r\r
    sentanceIterator = BreakIterator.getSentenceInstance();\r\r
    try {\r\r
      document.getText(0, document.getLength(), text);\r\r
      sentanceIterator.setText(text);\r\r
      currentWordPos = getNextWordStart(text, 0);\r\r
      //If the current word pos is -1 then the string was all white space\r\r
      if (currentWordPos != -1) {\r\r
        currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
        nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
      } else {\r\r
        moreTokens = false;\r\r
      }\r\r
    } catch (BadLocationException ex) {\r\r
      moreTokens = false;\r\r
    }\r\r
  }\r\r
\r\r
  /** This helper method will return the start character of the next\r\r
   * word in the buffer from the start position\r\r
   */\r\r
  private static int getNextWordStart(Segment text, int startPos) {\r\r
    if (startPos <= text.getEndIndex())\r\r
      for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {\r\r
        if (Character.isLetterOrDigit(ch)) {\r\r
          return text.getIndex();\r\r
        }\r\r
      }\r\r
    return -1;\r\r
  }\r\r
\r\r
  /** This helper method will return the end of the next word in the buffer.\r\r
   *\r\r
   */\r\r
  private static int getNextWordEnd(Segment text, int startPos) {\r\r
    for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {\r\r
      if (!Character.isLetterOrDigit(ch)) {\r\r
        return text.getIndex();\r\r
      }\r\r
    }\r\r
    return text.getEndIndex();\r\r
  }\r\r
\r\r
\r\r
  /** Returns true if there are more words that can be processed in the string\r\r
   *\r\r
   */\r\r
  public boolean hasMoreWords() {\r\r
    return moreTokens;\r\r
  }\r\r
\r\r
  /** Returns the current character position in the text\r\r
   *\r\r
   */\r\r
  public int getCurrentWordPosition() {\r\r
    return currentWordPos;\r\r
  }\r\r
\r\r
  /** Returns the current end word position in the text\r\r
   *\r\r
   */\r\r
  public int getCurrentWordEnd() {\r\r
    return currentWordEnd;\r\r
  }\r\r
\r\r
\r\r
  /** Returns the next word in the text\r\r
   *\r\r
   */\r\r
  public String nextWord() {\r\r
    if (!first) {\r\r
      currentWordPos = nextWordPos;\r\r
      currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
      nextWordPos = getNextWordStart(text, currentWordEnd+1);\r\r
      int current = sentanceIterator.current();\r\r
      if (current == currentWordPos)\r\r
        startsSentance = true;\r\r
      else {\r\r
        startsSentance = false;\r\r
        if (currentWordEnd > current)\r\r
          sentanceIterator.next();\r\r
      }\r\r
\r\r
    }\r\r
    //The nextWordPos has already been populated\r\r
    String word = null;\r\r
    try {\r\r
      word = document.getText(currentWordPos, currentWordEnd-currentWordPos);\r\r
    } catch (BadLocationException ex) {\r\r
      moreTokens = false;\r\r
    }\r\r
    wordCount++;\r\r
    first = false;\r\r
    if (nextWordPos == -1)\r\r
      moreTokens = false;\r\r
    return word;\r\r
  }\r\r
\r\r
  /** Returns the current number of words that have been processed\r\r
   *\r\r
   */\r\r
  public int getCurrentWordCount() {\r\r
    return wordCount;\r\r
  }\r\r
\r\r
  /** Replaces the current word token*/\r\r
  public void replaceWord(String newWord) {\r\r
    if (currentWordPos != -1) {\r\r
      try {\r\r
      /* ORIGINAL\r\r
        document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
        document.insertString(currentWordPos, newWord, null);\r\r
      */\r\r
      // Howard's Version for Ekit\r\r
		Element	element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);\r\r
		AttributeSet attribs = element.getAttributes();\r\r
        document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
        document.insertString(currentWordPos, newWord, attribs);\r\r
      // End Howard's Version\r\r
        //Need to reset the segment\r\r
        document.getText(0, document.getLength(), text);\r\r
      } catch (BadLocationException ex) {\r\r
        throw new RuntimeException(ex.getMessage());\r\r
      }\r\r
      //Position after the newly replaced word(s)\r\r
      //Position after the newly replaced word(s)\r\r
      first = true;\r\r
      currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());\r\r
      if (currentWordPos != -1) {\r\r
        currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
        nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
        sentanceIterator.setText(text);\r\r
        sentanceIterator.following(currentWordPos);\r\r
      } else moreTokens = false;\r\r
    }\r\r
  }\r\r
\r\r
  /** Returns the current text that is being tokenized (includes any changes\r\r
   *  that have been made)\r\r
   */\r\r
  public String getContext() {\r\r
    return text.toString();\r\r
  }\r\r
\r\r
  /** Returns true iif the current word is at the start of a sentance*/\r\r
  public boolean isNewSentance() {\r\r
    return startsSentance;\r\r
  }\r\r
\r\r
}
Commit	Line	Data
	1	package com.swabunga.spell.event;\r\r
	2	\r\r
	3	import java.util.*;\r\r
	4	import java.text.*;\r\r
	5	import javax.swing.text.AttributeSet;\r\r
	6	import javax.swing.text.Document;\r\r
	7	import javax.swing.text.Element;\r\r
	8	import javax.swing.text.Segment;\r\r
	9	import javax.swing.text.BadLocationException;\r\r
	10	\r\r
	11	/** This class tokenizes a swing document model. It also allows for the\r\r
	12	* document model to be changed when corrections occur.\r\r
	13	*\r\r
	14	* @author Jason Height (jheight@chariot.net.au)\r\r
	15	*/\r\r
	16	public class DocumentWordTokenizer implements WordTokenizer {\r\r
	17	/** Holds the start character position of the current word*/\r\r
	18	private int currentWordPos = 0;\r\r
	19	/** Holds the end character position of the current word*/\r\r
	20	private int currentWordEnd = 0;\r\r
	21	/** Holds the start character position of the next word*/\r\r
	22	private int nextWordPos = -1;\r\r
	23	/** The actual text that is being tokenized*/\r\r
	24	private Document document;\r\r
	25	/** The character iterator over the document*/\r\r
	26	private Segment text;\r\r
	27	/** The cumulative word count that have been processed*/\r\r
	28	private int wordCount = 0;\r\r
	29	/** Flag indicating if there are any more tokens (words) left*/\r\r
	30	private boolean moreTokens = true;\r\r
	31	/** Is this a special case where the currentWordStart, currntWordEnd and\r\r
	32	* nextWordPos have already been calculated. (see nextWord)\r\r
	33	*/\r\r
	34	private boolean first = true;\r\r
	35	\r\r
	36	private BreakIterator sentanceIterator;\r\r
	37	private boolean startsSentance = true;\r\r
	38	\r\r
	39	\r\r
	40	public DocumentWordTokenizer(Document document) {\r\r
	41	this.document = document;\r\r
	42	//Create a text segment over the etire document\r\r
	43	text = new Segment();\r\r
	44	sentanceIterator = BreakIterator.getSentenceInstance();\r\r
	45	try {\r\r
	46	document.getText(0, document.getLength(), text);\r\r
	47	sentanceIterator.setText(text);\r\r
	48	currentWordPos = getNextWordStart(text, 0);\r\r
	49	//If the current word pos is -1 then the string was all white space\r\r
	50	if (currentWordPos != -1) {\r\r
	51	currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
	52	nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
	53	} else {\r\r
	54	moreTokens = false;\r\r
	55	}\r\r
	56	} catch (BadLocationException ex) {\r\r
	57	moreTokens = false;\r\r
	58	}\r\r
	59	}\r\r
	60	\r\r
	61	/** This helper method will return the start character of the next\r\r
	62	* word in the buffer from the start position\r\r
	63	*/\r\r
	64	private static int getNextWordStart(Segment text, int startPos) {\r\r
	65	if (startPos <= text.getEndIndex())\r\r
	66	for (char ch = text.setIndex(startPos);ch != Segment.DONE;ch = text.next()) {\r\r
	67	if (Character.isLetterOrDigit(ch)) {\r\r
	68	return text.getIndex();\r\r
	69	}\r\r
	70	}\r\r
	71	return -1;\r\r
	72	}\r\r
	73	\r\r
	74	/** This helper method will return the end of the next word in the buffer.\r\r
	75	*\r\r
	76	*/\r\r
	77	private static int getNextWordEnd(Segment text, int startPos) {\r\r
	78	for (char ch = text.setIndex(startPos); ch != Segment.DONE;ch = text.next()) {\r\r
	79	if (!Character.isLetterOrDigit(ch)) {\r\r
	80	return text.getIndex();\r\r
	81	}\r\r
	82	}\r\r
	83	return text.getEndIndex();\r\r
	84	}\r\r
	85	\r\r
	86	\r\r
	87	/** Returns true if there are more words that can be processed in the string\r\r
	88	*\r\r
	89	*/\r\r
	90	public boolean hasMoreWords() {\r\r
	91	return moreTokens;\r\r
	92	}\r\r
	93	\r\r
	94	/** Returns the current character position in the text\r\r
	95	*\r\r
	96	*/\r\r
	97	public int getCurrentWordPosition() {\r\r
	98	return currentWordPos;\r\r
	99	}\r\r
	100	\r\r
	101	/** Returns the current end word position in the text\r\r
	102	*\r\r
	103	*/\r\r
	104	public int getCurrentWordEnd() {\r\r
	105	return currentWordEnd;\r\r
	106	}\r\r
	107	\r\r
	108	\r\r
	109	/** Returns the next word in the text\r\r
	110	*\r\r
	111	*/\r\r
	112	public String nextWord() {\r\r
	113	if (!first) {\r\r
	114	currentWordPos = nextWordPos;\r\r
	115	currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
	116	nextWordPos = getNextWordStart(text, currentWordEnd+1);\r\r
	117	int current = sentanceIterator.current();\r\r
	118	if (current == currentWordPos)\r\r
	119	startsSentance = true;\r\r
	120	else {\r\r
	121	startsSentance = false;\r\r
	122	if (currentWordEnd > current)\r\r
	123	sentanceIterator.next();\r\r
	124	}\r\r
	125	\r\r
	126	}\r\r
	127	//The nextWordPos has already been populated\r\r
	128	String word = null;\r\r
	129	try {\r\r
	130	word = document.getText(currentWordPos, currentWordEnd-currentWordPos);\r\r
	131	} catch (BadLocationException ex) {\r\r
	132	moreTokens = false;\r\r
	133	}\r\r
	134	wordCount++;\r\r
	135	first = false;\r\r
	136	if (nextWordPos == -1)\r\r
	137	moreTokens = false;\r\r
	138	return word;\r\r
	139	}\r\r
	140	\r\r
	141	/** Returns the current number of words that have been processed\r\r
	142	*\r\r
	143	*/\r\r
	144	public int getCurrentWordCount() {\r\r
	145	return wordCount;\r\r
	146	}\r\r
	147	\r\r
	148	/** Replaces the current word token*/\r\r
	149	public void replaceWord(String newWord) {\r\r
	150	if (currentWordPos != -1) {\r\r
	151	try {\r\r
	152	/* ORIGINAL\r\r
	153	document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
	154	document.insertString(currentWordPos, newWord, null);\r\r
	155	*/\r\r
	156	// Howard's Version for Ekit\r\r
	157	Element element = ((javax.swing.text.html.HTMLDocument)document).getCharacterElement(currentWordPos);\r\r
	158	AttributeSet attribs = element.getAttributes();\r\r
	159	document.remove(currentWordPos, currentWordEnd - currentWordPos);\r\r
	160	document.insertString(currentWordPos, newWord, attribs);\r\r
	161	// End Howard's Version\r\r
	162	//Need to reset the segment\r\r
	163	document.getText(0, document.getLength(), text);\r\r
	164	} catch (BadLocationException ex) {\r\r
	165	throw new RuntimeException(ex.getMessage());\r\r
	166	}\r\r
	167	//Position after the newly replaced word(s)\r\r
	168	//Position after the newly replaced word(s)\r\r
	169	first = true;\r\r
	170	currentWordPos = getNextWordStart(text, currentWordPos+newWord.length());\r\r
	171	if (currentWordPos != -1) {\r\r
	172	currentWordEnd = getNextWordEnd(text, currentWordPos);\r\r
	173	nextWordPos = getNextWordStart(text, currentWordEnd);\r\r
	174	sentanceIterator.setText(text);\r\r
	175	sentanceIterator.following(currentWordPos);\r\r
	176	} else moreTokens = false;\r\r
	177	}\r\r
	178	}\r\r
	179	\r\r
	180	/** Returns the current text that is being tokenized (includes any changes\r\r
	181	* that have been made)\r\r
	182	*/\r\r
	183	public String getContext() {\r\r
	184	return text.toString();\r\r
	185	}\r\r
	186	\r\r
	187	/** Returns true iif the current word is at the start of a sentance*/\r\r
	188	public boolean isNewSentance() {\r\r
	189	return startsSentance;\r\r
	190	}\r\r
	191	\r\r
	192	}