1 /* This class is based on Lawrence Phillips original c++ code for this class.
2 * Found here: http://aspell.sourceforge.net/metaphone/
7 package com
.swabunga
.spell
.engine
;
13 * Things that were changed:
14 * The alternate flag could be set to true but was never checked so why bother with it. REMOVED
15 * Why was this class serializable?
16 * The primary, in, length and last variables could be initialized and local to the
17 * process method and references passed arround the appropriate methods. As such there are
18 * no class variables and this class becomes firstly threadsafe and secondly could be static final.
19 * The function call SlavoGermaic was called repeatedly in the process function, it is now only called once.
22 class DoubleMeta
implements Transformator
{
24 private static final String
[] myList
= {
25 "GN", "KN", "PN", "WR", "PS", ""
27 private static final String
[] list1
= {
30 private static final String
[] list2
= {
31 "BACHER", "MACHER", ""
33 private static final String
[] list3
= {
36 private static final String
[] list4
= {
39 private static final String
[] list5
= {
42 private static final String
[] list6
= {
45 private static final String
[] list7
= {
48 private static final String
[] list8
= {
49 "HOR", "HYM", "HIA", "HEM", ""
51 private static final String
[] list9
= {
54 private static final String
[] list10
= {
57 private static final String
[] list11
= {
60 private static final String
[] list12
= {
61 "ORCHES", "ARCHIT", "ORCHID", ""
63 private static final String
[] list13
= {
66 private static final String
[] list14
= {
67 "A", "O", "U", "E", ""
69 private static final String
[] list15
= {
70 "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", ""
72 private static final String
[] list16
= {
75 private static final String
[] list17
= {
78 private static final String
[] list18
= {
81 private static final String
[] list19
= {
84 private static final String
[] list20
= {
87 private static final String
[] list21
= {
90 private static final String
[] list22
= {
93 private static final String
[] list23
= {
96 private static final String
[] list24
= {
99 private static final String
[] list25
= {
102 private static final String
[] list26
= {
103 "CIO", "CIE", "CIA", ""
105 private static final String
[] list27
= {
108 private static final String
[] list28
= {
111 private static final String
[] list29
= {
114 private static final String
[] list30
= {
117 private static final String
[] list31
= {
120 private static final String
[] list32
= {
123 private static final String
[] list33
= {
126 private static final String
[] list34
= {
129 private static final String
[] list35
= {
132 private static final String
[] list36
= {
133 "C", "G", "L", "R", "T", ""
135 private static final String
[] list37
= {
138 private static final String
[] list38
= {
141 private static final String
[] list39
= {
142 "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER",
145 private static final String
[] list40
= {
148 private static final String
[] list41
= {
149 "DANGER", "RANGER", "MANGER", ""
151 private static final String
[] list42
= {
154 private static final String
[] list43
= {
157 private static final String
[] list44
= {
160 private static final String
[] list45
= {
163 private static final String
[] list46
= {
166 private static final String
[] list47
= {
169 private static final String
[] list48
= {
172 private static final String
[] list49
= {
175 private static final String
[] list50
= {
178 private static final String
[] list51
= {
181 private static final String
[] list52
= {
184 private static final String
[] list53
= {
187 private static final String
[] list54
= {
188 "L", "T", "K", "S", "N", "M", "B", "Z", ""
190 private static final String
[] list55
= {
193 private static final String
[] list56
= {
194 "ILLO", "ILLA", "ALLE", ""
196 private static final String
[] list57
= {
199 private static final String
[] list58
= {
202 private static final String
[] list59
= {
205 private static final String
[] list60
= {
208 private static final String
[] list61
= {
211 private static final String
[] list62
= {
214 private static final String
[] list63
= {
217 private static final String
[] list64
= {
220 private static final String
[] list65
= {
223 private static final String
[] list66
= {
226 private static final String
[] list67
= {
229 private static final String
[] list68
= {
230 "HEIM", "HOEK", "HOLM", "HOLZ", ""
232 private static final String
[] list69
= {
235 private static final String
[] list70
= {
238 private static final String
[] list71
= {
239 "M", "N", "L", "W", ""
241 private static final String
[] list72
= {
244 private static final String
[] list73
= {
247 private static final String
[] list74
= {
250 private static final String
[] list75
= {
251 "OO", "ER", "EN", "UY", "ED", "EM", ""
253 private static final String
[] list76
= {
256 private static final String
[] list77
= {
259 private static final String
[] list78
= {
262 private static final String
[] list79
= {
265 private static final String
[] list80
= {
268 private static final String
[] list81
= {
271 private static final String
[] list82
= {
274 private static final String
[] list83
= {
277 private static final String
[] list84
= {
280 private static final String
[] list85
= {
283 private static final String
[] list86
= {
286 private static final String
[] list87
= {
289 private static final String
[] list88
= {
292 private static final String
[] list89
= {
295 private static final String
[] list90
= {
296 "EWSKI", "EWSKY", "OWSKI", "OWSKY", ""
298 private static final String
[] list91
= {
301 private static final String
[] list92
= {
304 private static final String
[] list93
= {
307 private static final String
[] list94
= {
310 private static final String
[] list95
= {
313 private static final String
[] list96
= {
318 * put your documentation comment here
321 private final static boolean SlavoGermanic (String
in) {
322 if ((in.indexOf("W") > -1) || (in.indexOf("K") > -1) || (in.indexOf("CZ") > -1)
323 || (in.indexOf("WITZ") > -1))
329 * put your documentation comment here
332 private final static void MetaphAdd (StringBuffer primary
, String main
) {
334 primary
.append(main
);
338 private final static void MetaphAdd (StringBuffer primary
, char main
) {
339 primary
.append(main
);
343 * put your documentation comment here
347 private final static boolean isVowel (String
in, int at
, int length
) {
348 if ((at
< 0) || (at
>= length
))
350 char it
= in.charAt(at
);
351 if ((it
== 'A') || (it
== 'E') || (it
== 'I') || (it
== 'O') || (it
==
358 * put your documentation comment here
365 private final static boolean stringAt (String string
, int start
, int length
, String
[] list
) {
366 if ((start
< 0) || (start
>= string
.length()) || list
.length
== 0 )
368 String substr
= string
.substring(start
, start
+ length
);
369 for (int i
= 0; i
< list
.length
; i
++) {
370 if (list
[i
].equals(substr
))
377 * put your documentation comment here
378 * @param word The word to process.
381 public final String
transform (String word
) {
382 StringBuffer primary
= new StringBuffer( word
.length() + 5 );
383 String
in = word
.toUpperCase() + " ";
385 int length
= in.length();
388 int last
= length
- 1;
389 boolean isSlavoGermaic
= SlavoGermanic(in);
390 if (stringAt(in, 0, 2, myList
))
392 if (in.charAt(0) == 'X') {
393 MetaphAdd(primary
, 'S');
396 while (current
< length
) {
397 switch (in.charAt(current
)) {
398 case 'A':case 'E':case 'I':case 'O':case 'U':case 'Y':
400 MetaphAdd(primary
, 'A');
404 MetaphAdd(primary
, 'P');
405 if (in.charAt(current
+ 1) == 'B')
411 MetaphAdd(primary
, 'S');
415 if ((current
> 1) && !isVowel(in, current
- 2, length
) && stringAt(in, (current
416 - 1), 3, list1
) && (in.charAt(current
+ 2) != 'I') && (in.charAt(
417 current
+ 2) != 'E') || stringAt(in, (current
- 2), 6, list2
)) {
418 MetaphAdd(primary
, 'K');
422 if ((current
== 0) && stringAt(in, current
, 6, list3
)) {
423 MetaphAdd(primary
, 'S');
427 if (stringAt(in, current
, 4, list4
)) {
428 MetaphAdd(primary
, 'K');
432 if (stringAt(in, current
, 2, list5
)) {
433 if ((current
> 0) && stringAt(in, current
, 4, list6
)) {
434 MetaphAdd(primary
, 'K' );
438 if ((current
== 0) && stringAt(in, (current
+ 1), 5, list7
) ||
439 stringAt(in, current
+ 1, 3, list8
) && !stringAt(in, 0, 5,
441 MetaphAdd(primary
, 'K' );
445 if (stringAt(in, 0, 4, list10
) || stringAt(in, 0, 3, list11
) ||
446 stringAt(in, current
- 2, 6, list12
) || stringAt(in, current
447 + 2, 1, list13
) || (stringAt(in, current
- 1, 1, list14
) ||
448 (current
== 0)) && stringAt(in, current
+ 2, 1, list15
)) {
449 MetaphAdd(primary
, 'K');
453 if (stringAt(in, 0, 2, list16
))
454 MetaphAdd(primary
, 'K');
456 MetaphAdd(primary
, 'X');
459 MetaphAdd(primary
, 'X');
465 if (stringAt(in, current
, 2, list17
) && !stringAt(in, current
, 4,
467 MetaphAdd(primary
, 'S');
471 if (stringAt(in, current
, 2, list19
)) {
472 MetaphAdd(primary
, 'X');
476 if (stringAt(in, current
, 2, list20
) && !((current
== 1) && in.charAt(0)
478 if (stringAt(in, current
+ 2, 1, list21
) && !stringAt(in, current
480 if (((current
== 1) && (in.charAt(current
- 1) == 'A')) || stringAt(in,
481 (current
- 1), 5, list23
))
482 MetaphAdd(primary
, "KS");
484 MetaphAdd(primary
, 'X');
489 MetaphAdd(primary
, 'K');
494 if (stringAt(in, current
, 2, list24
)) {
495 MetaphAdd(primary
, 'K');
499 else if (stringAt(in, current
, 2, list25
)) {
500 MetaphAdd(primary
, 'S');
505 MetaphAdd(primary
, 'K');
506 if (stringAt(in, current
+ 1, 2, list27
))
508 else if (stringAt(in, current
+ 1, 1, list28
) && !stringAt(in, current
515 if (stringAt(in, current
, 2, list30
)) {
516 if (stringAt(in, current
+ 2, 1, list31
)) {
517 MetaphAdd(primary
, 'J');
522 MetaphAdd(primary
, "TK");
527 MetaphAdd(primary
, 'T');
528 if (stringAt(in, current
, 2, list32
)) {
535 if (in.charAt(current
+ 1) == 'F')
539 MetaphAdd(primary
, 'F');
542 if (in.charAt(current
+ 1) == 'H') {
543 if ((current
> 0) && !isVowel(in, current
- 1, length
)) {
544 MetaphAdd(primary
, 'K');
550 if (in.charAt(current
+ 2) == 'I')
551 MetaphAdd(primary
, 'J');
553 MetaphAdd(primary
, 'K');
558 if ((current
> 1) && stringAt(in, current
- 2, 1, list33
) || ((current
> 2)
559 && stringAt(in, current
- 3, 1, list34
)) || ((current
> 3) &&
560 stringAt(in, current
- 4, 1, list35
))) {
565 if ((current
> 2) && (in.charAt(current
- 1) == 'U') && stringAt(in,
566 current
- 3, 1, list36
)) {
567 MetaphAdd(primary
, 'F');
570 if ((current
> 0) && (in.charAt(current
- 1) != 'I'))
571 MetaphAdd(primary
, 'K');
577 if (in.charAt(current
+ 1) == 'N') {
578 if ((current
== 1) && isVowel(in, 0, length
) && !isSlavoGermaic
) {
579 MetaphAdd(primary
, "KN");
582 if (!stringAt(in, current
+ 2, 2, list37
) && (in.charAt(current
583 + 1) != 'Y') && !isSlavoGermaic
) {
584 MetaphAdd(primary
, "N");
587 MetaphAdd(primary
, "KN");
593 if (stringAt(in, current
+ 1, 2, list38
) && !isSlavoGermaic
) {
594 MetaphAdd(primary
, "KL");
598 if ((current
== 0) && ((in.charAt(current
+ 1) == 'Y') || stringAt(in,
599 current
+ 1, 2, list39
))) {
600 MetaphAdd(primary
, 'K');
604 if ((stringAt(in, current
+ 1, 2, list40
) || (in.charAt(current
+
605 1) == 'Y')) && !stringAt(in, 0, 6, list41
) && !stringAt(in, current
606 - 1, 1, list42
) && !stringAt(in, current
- 1, 3, list43
)) {
607 MetaphAdd(primary
, 'K');
611 if (stringAt(in, current
+ 1, 1, list44
) || stringAt(in, current
-
613 if (stringAt(in, 0, 4, list46
) || stringAt(in, 0, 3, list47
) ||
614 stringAt(in, current
+ 1, 2, list48
)) {
615 MetaphAdd(primary
, 'K');
618 MetaphAdd(primary
, 'J');
623 if (in.charAt(current
+ 1) == 'G')
627 MetaphAdd(primary
, 'K');
630 if (((current
== 0) || isVowel(in, current
- 1, length
)) && isVowel(in, current
+
632 MetaphAdd(primary
, 'H');
640 if (stringAt(in, current
, 4, list50
) || stringAt(in, 0, 4, list51
)) {
641 if ((current
== 0) && (in.charAt(current
+ 4) == ' ') || stringAt(in,
643 MetaphAdd(primary
, 'H');
646 MetaphAdd(primary
, 'J');
651 if ((current
== 0) && !stringAt(in, current
, 4, list53
)) {
652 MetaphAdd(primary
, 'J');
655 if (isVowel(in, current
- 1, length
) && !isSlavoGermaic
&& ((in.charAt(current
656 + 1) == 'A') || in.charAt(current
+ 1) == 'O')) {
657 MetaphAdd(primary
, 'J' );
660 if (current
== last
) {
661 MetaphAdd(primary
, 'J' );
664 if (!stringAt(in, current
+ 1, 1, list54
) && !stringAt(in,
665 current
- 1, 1, list55
)) {
666 MetaphAdd(primary
, 'J');
671 if (in.charAt(current
+ 1) == 'J')
677 if (in.charAt(current
+ 1) == 'K')
681 MetaphAdd(primary
, 'K');
684 if (in.charAt(current
+ 1) == 'L') {
685 if (((current
== (length
- 3)) && stringAt(in, current
- 1, 4,
686 list56
)) || ((stringAt(in, last
- 1, 2, list57
) || stringAt(in,
687 last
, 1, list58
)) && stringAt(in, current
- 1, 4, list59
))) {
688 MetaphAdd(primary
, 'L' );
696 MetaphAdd(primary
, 'L');
699 if ((stringAt(in, current
- 1, 3, list60
) && (((current
+ 1) == last
)
700 || stringAt(in, current
+ 2, 2, list61
))) || (in.charAt(current
705 MetaphAdd(primary
, 'M');
708 if (in.charAt(current
+ 1) == 'N')
712 MetaphAdd(primary
, 'N');
716 MetaphAdd(primary
, 'N');
719 if (in.charAt(current
+ 1) == 'N') {
720 MetaphAdd(primary
, 'F');
724 if (stringAt(in, current
+ 1, 1, list62
))
728 MetaphAdd(primary
, 'P');
731 if (in.charAt(current
+ 1) == 'Q')
735 MetaphAdd(primary
, 'K');
738 if ((current
== last
) && !isSlavoGermaic
&& stringAt(in, current
739 - 2, 2, list63
) && !stringAt(in, current
- 4, 2, list64
)) {
740 // MetaphAdd(primary, "");
742 MetaphAdd(primary
, 'R');
743 if (in.charAt(current
+ 1) == 'R')
749 if (stringAt(in, current
- 1, 3, list65
)) {
753 if ((current
== 0) && stringAt(in, current
, 5, list66
)) {
754 MetaphAdd(primary
, 'X');
758 if (stringAt(in, current
, 2, list67
)) {
759 if (stringAt(in, current
+ 1, 4, list68
))
760 MetaphAdd(primary
, 'S');
762 MetaphAdd(primary
, 'X');
766 if (stringAt(in, current
, 3, list69
) || stringAt(in, current
, 4,
768 MetaphAdd(primary
, 'S');
772 if (((current
== 0) && stringAt(in, current
+ 1, 1, list71
)) || stringAt(in,
773 current
+ 1, 1, list72
)) {
774 MetaphAdd(primary
, 'S');
775 if (stringAt(in, current
+ 1, 1, list73
))
781 if (stringAt(in, current
, 2, list74
)) {
782 if (in.charAt(current
+ 2) == 'H')
783 if (stringAt(in, current
+ 3, 2, list75
)) {
784 if (stringAt(in, current
+ 3, 2, list76
)) {
785 MetaphAdd(primary
, "X");
788 MetaphAdd(primary
, "SK");
794 MetaphAdd(primary
, 'X');
798 if (stringAt(in, current
+ 2, 1, list77
)) {
799 MetaphAdd(primary
, 'S');
803 MetaphAdd(primary
, "SK");
807 if ((current
== last
) && stringAt(in, current
- 2, 2, list78
)) {
808 //MetaphAdd(primary, "");
810 MetaphAdd(primary
, 'S');
811 if (stringAt(in, current
+ 1, 1, list79
))
817 if (stringAt(in, current
, 4, list80
)) {
818 MetaphAdd(primary
, 'X');
822 if (stringAt(in, current
, 3, list81
)) {
823 MetaphAdd(primary
, 'X');
827 if (stringAt(in, current
, 2, list82
) || stringAt(in, current
, 3,
829 if (stringAt(in, (current
+ 2), 2, list84
) || stringAt(in, 0, 4,
830 list85
) || stringAt(in, 0, 3, list86
)) {
831 MetaphAdd(primary
, 'T');
834 MetaphAdd(primary
, '0');
839 if (stringAt(in, current
+ 1, 1, list87
)) {
844 MetaphAdd(primary
, 'T');
847 if (in.charAt(current
+ 1) == 'V')
851 MetaphAdd(primary
, 'F');
854 if (stringAt(in, current
, 2, list88
)) {
855 MetaphAdd(primary
, 'R');
859 if ((current
== 0) && (isVowel(in, current
+ 1, length
) || stringAt(in, current
,
861 MetaphAdd(primary
, 'A');
863 if (((current
== last
) && isVowel(in, current
- 1, length
)) || stringAt(in, current
864 - 1, 5, list90
) || stringAt(in, 0, 3, list91
)) {
865 MetaphAdd(primary
, 'F');
869 if (stringAt(in, current
, 4, list92
)) {
870 MetaphAdd(primary
, "TS");
877 if (!((current
== last
) && (stringAt(in, current
- 3, 3, list93
) ||
878 stringAt(in, current
- 2, 2, list94
))))
879 MetaphAdd(primary
, "KS");
880 if (stringAt(in, current
+ 1, 1, list95
))
886 if (in.charAt(current
+ 1) == 'H') {
887 MetaphAdd(primary
, 'J');
892 MetaphAdd(primary
, 'S');
894 if (in.charAt(current
+ 1) == 'Z')
903 return primary
.toString();