1 // Porter stemmer in Javascript. Few comments, but it's easy to follow against the rules in the original
4 // Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
7 // see also http://www.tartarus.org/~martin/PorterStemmer
9 // Release 1 be 'andargor', Jul 2004
10 // Release 2 (substantially revised) by Christopher McKenzie, Aug 2009
13 var stemmer = (function(){
48 c = "[^aeiou]", // consonant
49 v = "[aeiouy]", // vowel
50 C = c + "[^aeiouy]*", // consonant sequence
51 V = v + "[aeiou]*", // vowel sequence
53 mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
54 meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
55 mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
56 s_v = "^(" + C + ")?" + v; // vowel in stem
68 if (w.length < 3) { return w; }
70 firstch = w.substr(0,1);
72 w = firstch.toUpperCase() + w.substr(1);
76 re = /^(.+?)(ss|i)es$/;
77 re2 = /^(.+?)([^s])s$/;
79 if (re.test(w)) { w = w.replace(re,"$1$2"); }
80 else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
84 re2 = /^(.+?)(ed|ing)$/;
87 re = new RegExp(mgr0);
92 } else if (re2.test(w)) {
95 re2 = new RegExp(s_v);
99 re3 = new RegExp("([^aeiouylsz])\\1$");
100 re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
101 if (re2.test(w)) { w = w + "e"; }
102 else if (re3.test(w)) { re = /.$/; w = w.replace(re,""); }
103 else if (re4.test(w)) { w = w + "e"; }
112 re = new RegExp(s_v);
113 if (re.test(stem)) { w = stem + "i"; }
117 re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
122 re = new RegExp(mgr0);
124 w = stem + step2list[suffix];
129 re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
134 re = new RegExp(mgr0);
136 w = stem + step3list[suffix];
141 re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
142 re2 = /^(.+?)(s|t)(ion)$/;
146 re = new RegExp(mgr1);
150 } else if (re2.test(w)) {
151 var fp = re2.exec(w);
152 stem = fp[1] + fp[2];
153 re2 = new RegExp(mgr1);
154 if (re2.test(stem)) {
164 re = new RegExp(mgr1);
165 re2 = new RegExp(meq1);
166 re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
167 if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
173 re2 = new RegExp(mgr1);
174 if (re.test(w) && re2.test(w)) {
176 w = w.replace(re,"");
179 // and turn initial Y back to y
181 if (firstch == "y") {
182 w = firstch.toLowerCase() + w.substr(1);