001/* 002 * Copyright 2011 Atteo. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014package org.atteo.evo.inflector; 015 016/** 017 * Transforms English words from singular to plural form. 018 * <p> 019 * Examples: 020 * <pre> 021 * English.plural("word") = "words"; 022 * 023 * English.plural("cat", 1) = "cat"; 024 * English.plural("cat", 2) = "cats"; 025 * </pre> 026 * </p> 027 * <p> 028 * Based on <a href="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html"> 029 * An Algorithmic Approach to English Pluralization</a> by Damian Conway. 030 * </p> 031 */ 032public class English extends TwoFormInflector { 033 public static enum MODE { 034 ENGLISH_ANGLICIZED, ENGLISH_CLASSICAL 035 } 036 037 private static final String[] CATEGORY_EX_ICES = { "codex", "murex", 038 "silex", }; 039 040 private static final String[] CATEGORY_IX_ICES = { "radix", "helix", }; 041 042 private static final String[] CATEGORY_UM_A = { "bacterium", 043 "agendum", "desideratum", "erratum", "stratum", "datum", "ovum", 044 "extremum", "candelabrum", }; 045 046 // Always us -> i 047 private static final String[] CATEGORY_US_I = { "alumnus", "alveolus", 048 "bacillus", "bronchus", "locus", "nucleus", "stimulus", "meniscus", 049 "thesaurus", }; 050 051 private static final String[] CATEGORY_ON_A = { "criterion", 052 "perihelion", "aphelion", "phenomenon", "prolegomenon", "noumenon", 053 "organon", "asyndeton", "hyperbaton", }; 054 055 private static final String[] CATEGORY_A_AE = { "alumna", "alga", 056 "vertebra", "persona" }; 057 058 // Always o -> os 059 private static final String[] CATEGORY_O_OS = { "albino", 060 "archipelago", "armadillo", "commando", "crescendo", "fiasco", 061 "ditto", "dynamo", "embryo", "ghetto", "guano", "inferno", "jumbo", 062 "lumbago", "magneto", "manifesto", "medico", "octavo", "photo", 063 "pro", "quarto", "canto", "lingo", "generalissimo", "stylo", 064 "rhino", "casino", "auto", "macro", "zero", "todo" 065 }; 066 067 // Classical o -> i (normally -> os) 068 private static final String[] CATEGORY_O_I = { 069 "solo", "soprano", "basso", "alto", "contralto", "tempo", "piano", 070 "virtuoso", }; 071 072 private static final String[] CATEGORY_EN_INA = { 073 "stamen", "foramen", "lumen" 074 }; 075 076 // -a to -as (anglicized) or -ata (classical) 077 private static final String[] CATEGORY_A_ATA = { 078 "anathema", "enema", "oedema", "bema", "enigma", "sarcoma", 079 "carcinoma", "gumma", "schema", "charisma", "lemma", "soma", 080 "diploma", "lymphoma", "stigma", "dogma", "magma", "stoma", 081 "drama", "melisma", "trauma", "edema", "miasma" 082 }; 083 084 private static final String[] CATEGORY_IS_IDES = { 085 "iris", "clitoris" 086 }; 087 088 // -us to -uses (anglicized) or -us (classical) 089 private static final String[] CATEGORY_US_US = { 090 "apparatus", "impetus", "prospectus", "cantus", "nexus", "sinus", "coitus", 091 "plexus", "status", "hiatus" 092 }; 093 094 private static final String[] CATEGORY_NONE_I = { 095 "afreet", "afrit", "efreet" 096 }; 097 098 private static final String[] CATEGORY_NONE_IM = { 099 "cherub", "goy", "seraph" 100 }; 101 102 private static final String[] CATEGORY_EX_EXES = { 103 "apex", "latex", "vertex", "cortex", "pontifex", "vortex", "index", "simplex" 104 }; 105 106 private static final String[] CATEGORY_IX_IXES = { 107 "appendix" 108 }; 109 110 private static final String[] CATEGORY_S_ES = { 111 "acropolis", "chaos", "lens", "aegis", 112 "cosmos", "mantis", "alias", "dais", "marquis", "asbestos", 113 "digitalis", "metropolis", "atlas", "epidermis", "pathos", 114 "bathos", "ethos", "pelvis", "bias", "gas", "polis", "caddis", 115 "glottis", "rhinoceros", "cannabis", "glottis", "sassafras", 116 "canvas", "ibis", "trellis" 117 }; 118 119 private static final String[] CATEGORY_MAN_MANS = { 120 "human", "Alabaman", "Bahaman", "Burman", "German", "Hiroshiman", "Liman", "Nakayaman", "Oklahoman", 121 "Panaman", "Selman", "Sonaman", "Tacoman", "Yakiman", "Yokohaman", "Yuman" 122 }; 123 124 private static English inflector = new English(); 125 126 127 public English() { 128 this(MODE.ENGLISH_ANGLICIZED); 129 } 130 131 public English(MODE mode) { 132 133 uncountable(new String[] { 134 // 2. Handle words that do not inflect in the plural (such as fish, travois, chassis, nationalities ending 135 // endings 136 "fish", "ois", "sheep", "deer", "pox", "itis", 137 138 // words 139 "bison", "flounder", "pliers", "bream", 140 "gallows", "proceedings", "breeches", "graffiti", "rabies", 141 "britches", "headquarters", "salmon", "carp", "herpes", 142 "scissors", "chassis", "high-jinks", "sea-bass", "clippers", 143 "homework", "series", "cod", "innings", "shears", 144 "contretemps", "jackanapes", "species", "corps", "mackerel", 145 "swine", "debris", "measles", "trout", "diabetes", "mews", 146 "tuna", "djinn", "mumps", "whiting", "eland", "news", 147 "wildebeest", "elk", "pincers", "sugar" }); 148 149 // 4. Handle standard irregular plurals (mongooses, oxen, etc.) 150 151 irregular(new String[][] { 152 { "child", "children" }, // classical 153 { "ephemeris", "ephemerides" }, // classical 154 { "mongoose", "mongoose" }, // anglicized 155 { "mythos", "mythoi" }, // classical 156 // TODO: handle entire word correctly 157 //{ "ox", "oxen" }, // classical 158 { "soliloquy", "soliloquies" }, // anglicized 159 { "trilby", "trilbys" }, // anglicized 160 { "genus", "genera" }, // classical 161 { "quiz", "quizzes" }, 162 }); 163 164 if (mode == MODE.ENGLISH_ANGLICIZED) { 165 // Anglicized plural 166 irregular(new String[][] { 167 { "beef", "beefs" }, 168 { "brother", "brothers" }, 169 { "cow", "cows" }, 170 { "genie", "genies" }, 171 { "money", "moneys" }, 172 { "octopus", "octopuses" }, 173 { "opus", "opuses" }, 174 }); 175 } else if (mode == MODE.ENGLISH_CLASSICAL) { 176 // Classical plural 177 irregular(new String[][] { { "beef", "beeves"}, 178 { "brother", "brethren" }, 179 { "cow", "kine" }, { "genie", "genii"}, 180 { "money", "monies" }, 181 { "octopus", "octopodes" }, 182 { "opus", "opera" }, 183 }); 184 } 185 186 categoryRule(CATEGORY_MAN_MANS, "", "s"); 187 188 // questionable 189 /* 190 rule(new String[][] { 191 { "(ness)$", "$1" }, 192 { "(ality)$", "$1" } 193 { "(icity)$", "$1" }, 194 { "(ivity)$", "$1" }, 195 }); 196 */ 197 // 5. Handle irregular inflections for common suffixes 198 rule(new String[][] { 199 { "man$", "men" }, 200 { "([lm])ouse$", "$1ice" }, 201 { "tooth$", "teeth" }, 202 { "goose$", "geese" }, 203 { "foot$", "feet" }, 204 { "zoon$", "zoa" }, 205 { "([csx])is$", "$1es" }, 206 }); 207 208 // 6. Handle fully assimilated classical inflections 209 categoryRule(CATEGORY_EX_ICES, "ex", "ices"); 210 categoryRule(CATEGORY_IX_ICES, "ix", "ices"); 211 categoryRule(CATEGORY_UM_A, "um", "a"); 212 categoryRule(CATEGORY_ON_A, "on", "a"); 213 categoryRule(CATEGORY_A_AE, "a", "ae"); 214 215 // 7. Handle classical variants of modern inflections 216 if (mode == MODE.ENGLISH_CLASSICAL) { 217 rule(new String[][]{ 218 { "trix$", "trices" }, 219 { "eau$", "eaux" }, 220 { "ieu$", "ieux" }, 221 { "(..[iay])nx$", "$1nges" }, 222 }); 223 categoryRule(CATEGORY_EN_INA, "en", "ina"); 224 categoryRule(CATEGORY_A_ATA, "a", "ata"); 225 categoryRule(CATEGORY_IS_IDES, "is", "ides"); 226 categoryRule(CATEGORY_US_US, "", ""); 227 categoryRule(CATEGORY_O_I, "o", "i"); 228 categoryRule(CATEGORY_NONE_I, "", "i"); 229 categoryRule(CATEGORY_NONE_IM, "", "im"); 230 categoryRule(CATEGORY_EX_EXES, "ex", "ices"); 231 categoryRule(CATEGORY_IX_IXES, "ix", "ices"); 232 } 233 234 categoryRule(CATEGORY_US_I, "us", "i"); 235 236 rule("([cs]h|[zx])$", "$1es"); 237 categoryRule(CATEGORY_S_ES, "", "es"); 238 categoryRule(CATEGORY_IS_IDES, "", "es"); 239 categoryRule(CATEGORY_US_US, "", "es"); 240 rule("(us)$", "$1es"); 241 categoryRule(CATEGORY_A_ATA, "", "s"); 242 243 // The suffixes -ch, -sh, and -ss all take -es in the plural (churches, 244 // classes, etc)... 245 rule(new String[][] { { "([cs])h$", "$1hes" }, { "ss$", "sses" } }); 246 247 // Certain words ending in -f or -fe take -ves in the plural (lives, 248 // wolves, etc)... 249 rule(new String[][] { 250 { "([aeo]l)f$", "$1ves" }, 251 { "([^d]ea)f$", "$1ves" }, 252 { "(ar)f$", "$1ves" }, 253 { "([nlw]i)fe$", "$1ves" } 254 }); 255 256 // Words ending in -y take -ys 257 rule(new String[][] { { "([aeiou]y)$", "$1s" }, { "y$", "ies" }, }); 258 259 // Some words ending in -o take -os (including does preceded by a vowel) 260 categoryRule(CATEGORY_O_I, "o", "os"); 261 categoryRule(CATEGORY_O_OS, "o", "os"); 262 rule("([aeiou]o)$", "$1s"); 263 // The rest take -oes 264 rule("(o)$", "$1es"); 265 266 rule("(ul)um$", "$1a"); 267 268 categoryRule(CATEGORY_A_ATA, "", "es"); 269 270 rule("(s)$", "$1es"); 271 272 // Return empty string for empty string input 273 rule("^$", ""); 274 // Otherwise, assume that the plural just adds -s 275 rule("$", "s"); 276 } 277 278 /** 279 * Returns plural form of the given word. 280 * 281 * @param word word in singular form 282 * @return plural form of the word 283 */ 284 @Override 285 public String getPlural(String word) { 286 return super.getPlural(word); 287 } 288 289 /** 290 * Returns singular or plural form of the word based on count. 291 * 292 * @param word word in singular form 293 * @param count word count 294 * @return form of the word correct for given count 295 */ 296 public String getPlural(String word, int count) { 297 if (count == 1) { 298 return word; 299 } 300 return getPlural(word); 301 } 302 303 /** 304 * Returns plural form of the given word. 305 * <p> 306 * For instance: 307 * <pre> 308 * {@code 309 * English.plural("cat") == "cats"; 310 * } 311 * </pre> 312 * </p> 313 * @param word word in singular form 314 * @return plural form of given word 315 */ 316 public static String plural(String word) { 317 return inflector.getPlural(word); 318 } 319 320 /** 321 * Returns singular or plural form of the word based on count. 322 * <p> 323 * For instance: 324 * <pre> 325 * {@code 326 * English.plural("cat", 1) == "cat"; 327 * English.plural("cat", 2) == "cats"; 328 * } 329 * </pre> 330 * </p> 331 * @param word word in singular form 332 * @param count word count 333 * @return form of the word correct for given count 334 */ 335 public static String plural(String word, int count) { 336 return inflector.getPlural(word, count); 337 } 338 339 public static void setMode(MODE mode) { 340 English newInflector = new English(mode); 341 inflector = newInflector; 342 } 343}