Author Topic: Singularize and Pluralize sefuns...  (Read 7822 times)

Offline Tricky

  • BFF
  • ***
  • Posts: 189
  • I like what I code and I code what I like!
    • View Profile
Singularize and Pluralize sefuns...
« on: January 02, 2012, 02:38:19 PM »
I extended my singularize code to also do pluralize. It uses the same format as the singularize exception file and therefore is quite flexible.

Again the the format of the code has changed, but the configuration files have stayed the same.

License: See my sig below.

Code: (sefun code) [Select]
/* parser.c
 *
 * Tricky @ LPUniversity
 * 27-DEC-2011
 * simul_efun
 *
 */

/* This is from my global.h file. */
#ifdef __SENSIBLE_MODIFIERS__
#define privatef protected
#define privatev nosave

#define staticf protected
#define staticv nosave
#else
#define privatef static
#define privatev static

#define staticf static
#define staticv static

/* Just in-case someone really wants to use these. */
#define protected private
#define nosave    static
#endif

/* Where to find the exception file for singularize(). */
#define S_EXCEPTIONS "/path/to/singularize.conf"

/* Where to find the exception file for pluralize(). */
#define P_EXCEPTIONS "/path/to/pluralize.conf"

privatev mapping Exceptions = ([
  "singularize": ([
    "is": ({ "*", "are" }),
    "blues": ({ 0, "" }),
    "bonuses": ({ 2, "" }),
    "buses": ({ 2, "" }),
    "children": ({ 3, "" }),
    "dice": ({ 2, "e" }),
    "dies": ({ 1, "" }),
    "feet": ({ 3, "oot" }),
    "geese": ({ 4, "oose" }),
    "has": ({ 1, "ve" }),
    "indices": ({ 4, "ex" }),
    "lice": ({ 3, "ouse" }),
    "lotuses": ({ 2, "" }),
    "matrices": ({ 3, "x" }),
    "mice": ({ 3, "ouse" }),
    "monies": ({ 3, "ey" }),
    "oxen": ({ 2, "" }),
    "pants": ({ 0, "" }),
    "remains": ({ 0, "" }),
    "staves": ({ 3, "ff" }),
    "teeth": ({ 4, "ooth" }),
    "thieves": ({ 3, "f" }),
    "vaxen": ({ 2, "" }),
    "vertices": ({ 4, "ex" }),
    "viruses": ({ 2, "" }),
    "were": ({ 3, "as" }),
  ]),
  "pluralize": ([
    "are": ({ "*", "is" }),
    "blues": ({ 0, "" }),
    "bonus": ({ 0, "es" }),
    "bus": ({ 0, "es" }),
    "child": ({ 0, "ren" }),
    "die": ({ 1, "ce" }),
    "foot": ({ 3, "eet" }),
    "goose": ({ 4, "eese" }),
    "have": ({ 2, "s" }),
    "index": ({ 2, "ices" }),
    "lotus": ({ 0, "es" }),
    "louse": ({ 4, "ice" }),
    "matrix": ({ 1, "ces" }),
    "money": ({ 2, "ies" }),
    "mouse": ({ 4, "ice" }),
    "ox": ({ 0, "en" }),
    "pants": ({ 0, "" }),
    "remains": ({ 0, "" }),
    "staff": ({ 2, "ves" }),
    "thief": ({ 1, "ves" }),
    "tooth": ({ 4, "eeth" }),
    "vax": ({ 0, "en" }),
    "vertex": ({ 2, "ices" }),
    "virus": ({ 0, "es" }),
    "was": ({ 2, "ere" }),
  ])
]);

privatev mapping re_Exceptions = ([
  "singularize": ({
    ([ "re": ".*s'", "cut": 1, "suffix": "" ]),
    ([ "re": ".*s's", "cut": 2, "suffix": "" ]),
    ([ "re": ".*[aeiou]ss", "cut": 0, "suffix": "" ]),
    /* 'es' endings. */
    ([ "re": ".*[cglpt]es", "cut": 1, "suffix": "" ]),
    ([ "re": ".*ies", "cut": 3, "suffix": "y" ]),
    ([ "re": ".*oes", "cut": 2, "suffix": "" ]),
    /* Vowel before 'ses'. */
    ([ "re": ".*[aeiou]ses", "cut": 1, "suffix": "" ]),
    /* Otherwise no vowel. */
    ([ "re": ".*ses", "cut": 2, "suffix": "" ]),
    /* Vowel before 'ves'. */
    ([ "re": ".*[aeiou]ves", "cut": 1, "suffix": "" ]),
    /* Otherwise no vowel. */
    ([ "re": ".*ves", "cut": 3, "suffix": "" ]),
    ([ "re": ".*men", "cut": 2, "suffix": "an" ]),
    /* Has to be last! */
    ([ "re": ".*s", "cut": 1, "suffix": "" ]),
  }),
  "pluralize": ({
    ([ "re": ".*[aeiou]ss", "cut": 0, "suffix": "" ]),
    ([ "re": ".*[os]", "cut": 0, "suffix": "es" ]),
    ([ "re": ".*[cglpt]e", "cut": 0, "suffix": "s" ]),
    ([ "re": ".*y", "cut": 1, "suffix": "ies" ]),
    ([ "re": ".*[aeiou]ve", "cut": 0, "suffix": "s" ]),
    ([ "re": ".*man", "cut": 2, "suffix": "en" ]),
    ([ "re": ".*ma", "cut": 0, "suffix": "s" ]),
    ([ "re": ".*a", "cut": 0, "suffix": "e" ]),
    ([ "re": ".*um", "cut": 2, "suffix": "a" ]),
    ([ "re": ".*on", "cut": 2, "suffix": "a" ]),
    ([ "re": ".*us", "cut": 0, "suffix": "es" ]),
  })
]);

privatev mixed *ExceptionsLoaded = ({ });

/* This function is private to the simul efun object. */
varargs privatef int f_load_Exceptions(string fname, string tag) {
  if (!stringp(tag) || tag == "") tag = "__EXCEPTION__";

  if (file_size(fname) > 0 && member_array(tag, ExceptionsLoaded) == -1) {
    string array lines;
    string exceptions;
    int re = 0;

    /* Initialize the private vars. */
    if (!mapp(Exceptions)) Exceptions = ([ ]);
    if (!mapp(re_Exceptions)) re_Exceptions = ([ ]);
    if (!arrayp(ExceptionsLoaded)) ExceptionsLoaded = ({ });

    /* Reset the Exception tags. */
    Exceptions[tag] = ([ ]);
    re_Exceptions[tag] = ({ });
    ExceptionsLoaded += ({ tag });

    exceptions = read_file(fname);
    lines = explode(exceptions, "\n");

    /* Ignore empty lines and '#' remarks. */
    lines = filter(lines, (: $1[0..0] != "#" && sizeof(trim($1)) :) );

    foreach (string line in lines) {
      string s_plural, s_cut, s_suffix;
      int ret;

      line = trim(line);
      ret = sscanf(line, "%s%*([ \t]+)%s%*([ \t]+)%s", s_plural, s_cut, s_suffix);

      if (s_plural[0..0] == "R") {
        re_Exceptions[tag] += ({ allocate_mapping(3) });
        re_Exceptions[tag][re]["re"] = s_plural[1..<1];

        if (ret == 1) {
          re_Exceptions[tag][re]["cut"] = 0;
          re_Exceptions[tag][re]["suffix"] = "";
        } else if (ret == 3) {
          re_Exceptions[tag][re]["cut"] = to_int(s_cut);
          re_Exceptions[tag][re]["suffix"] = "";
        } else if (ret == 5) {
          if (s_cut != "*")
            re_Exceptions[tag][re]["cut"] = to_int(s_cut);
          else
            re_Exceptions[tag][re]["cut"] = "*";

          re_Exceptions[tag][re]["suffix"] = s_suffix;
        }

        re++;
      } else {
        if (!mapp(Exceptions[tag][s_plural])) Exceptions[tag][s_plural] = allocate(2);

        if (ret == 1) {
          Exceptions[tag][s_plural][0] = 0;
          Exceptions[tag][s_plural][1] = "";
        } else if (ret == 3) {
          Exceptions[tag][s_plural][0] = to_int(s_cut);
          Exceptions[tag][s_plural][1] = "";
        } else if (ret == 5) {
          if (s_cut != "*")
            Exceptions[tag][s_plural][0] = to_int(s_cut);
          else
            Exceptions[tag][s_plural][0] = "*";

          Exceptions[tag][s_plural][1] = s_suffix;
        }
      }
    }
  }

  /* Did we load the configuration file? */
  return (member_array(tag, ExceptionsLoaded) != -1);
}

/* This can be used to check contents of the Exceptions. */
mapping query_Exceptions() { return copy(Exceptions); }
mapping query_re_Exceptions() { return copy(re_Exceptions); }
mixed array query_ExceptionsLoaded() { return copy(ExceptionsLoaded); }

string singularize(string str) {
  mixed array exception;
  string array tmp_arr;
  string tmp = "", pre = "", plural = "", post = "", single = "";

  str = trim(str);

  if (str == "") return "";

  /* Don't really care about the return value. */
  f_load_Exceptions(S_EXCEPTIONS, "singularize");

  /* Same rules apply as in efun::pluralize. 'X of Y' -> 'X' is singularized. */
  if (sscanf(str, "%s of %s", plural, post) != 2) plural = str;
  else post = " of " + post;

  tmp_arr = explode(plural, " ");

  /* Pick out the last word on the line. */
  plural = tmp_arr[<1];
  tmp = lower_case(plural);

  if (sizeof(tmp_arr) > 1) pre = implode(tmp_arr[0..<2], " ");

  /* Check the Exceptions mapping. */
  exception = Exceptions["singularize"][tmp];

  /* If not found then check the regular expressions. */
  if (!arrayp(exception)) {
    int re, sz;

    sz = sizeof(re_Exceptions["singularize"]);

    for (re = 0 ; re < sz ; re++) {
      string s_re = re_Exceptions["singularize"][re]["re"];

      if (regexp(tmp, "^" + s_re + "$")) {
        exception = ({
          re_Exceptions["singularize"][re]["cut"],
          re_Exceptions["singularize"][re]["suffix"]
        });
        break;
      }
    }
  }

  /* If we have found an exception then apply it. */
  if (arrayp(exception)) {
    mixed cut = exception[0];
    string suffix = exception[1];

    if (intp(cut) && cut == 0 && stringp(suffix) && suffix == "") {
      single = tmp;
    } else if (stringp(cut) && cut == "*") {
      single = suffix;
    } else {
      single = tmp;

      /* Cut out the required number of letters from the end. */
      if (intp(cut) && cut != 0) {
        int len = strlen(single);

        single = single[0..(len - 1) - cut];
      }

      /* Add on the suffix if available. */
      if (suffix != "") single += suffix;
    }
  }

  /* No exception found so just return the original. */
  if (single == "") single = tmp;

  if (pre != "") pre += " ";
  if (plural != tmp) single = capitalize(single);

  /* Stitch everything together. */
  return pre + single + post;
}

/* Overrides the driver's efun. */
string pluralize(string str) {
  mixed array exception;
  string array tmp_arr;
  string tmp = "", pre = "", singular = "", post = "", single = "";

  str = trim(str);

  if (str == "") return "";

  /* Don't really care about the return value. */
  f_load_Exceptions(P_EXCEPTIONS, "pluralize");

  /* Same rules apply as in efun::pluralize. 'X of Y' -> 'X' is pluralized. */
  if (sscanf(str, "%s of %s", singular, post) != 2) singular = str;
  else post = " of " + post;

  tmp_arr = explode(singular, " ");

  /* Pick out the last word on the line. */
  singular = tmp_arr[<1];
  tmp = lower_case(singular);

  if (sizeof(tmp_arr) > 1) pre = implode(tmp_arr[0..<2], " ");

  /* Check the Exceptions mapping. */
  exception = Exceptions["pluralize"][tmp];

  /* If not found then check the regular expressions. */
  if (!arrayp(exception)) {
    int re, sz;

    sz = sizeof(re_Exceptions["pluralize"]);

    for (re = 0 ; re < sz ; re++) {
      string s_re = re_Exceptions["pluralize"][re]["re"];

      if (regexp(tmp, "^" + s_re + "$")) {
        exception = ({
          re_Exceptions["pluralize"][re]["cut"],
          re_Exceptions["pluralize"][re]["suffix"]
        });
        break;
      }
    }
  }

  /* If we have found an exception then apply it. */
  if (arrayp(exception)) {
    mixed cut = exception[0];
    string suffix = exception[1];

    if (intp(cut) && cut == 0 && stringp(suffix) && suffix == "") {
      single = tmp;
    } else if (stringp(cut) && cut == "*") {
      single = suffix;
    } else {
      single = tmp;

      /* Cut out the required number of letters from the end. */
      if (intp(cut) && cut != 0) {
        int len = strlen(single);

        single = single[0..(len - 1) - cut];
      }

      /* Add on the suffix if available. */
      if (suffix != "") single += suffix;
    }
  }

  /* No exception found so add on an 's'. */
  if (single == "") single = tmp + "s";

  if (pre != "") pre += " ";
  if (singular != tmp) single = capitalize(single);

  /* Stitch everything together. */
  return pre + single + post;
}

Code: (singularize.conf) [Select]
# Exception file for singuralize.
#
# File format
# ===========
# <plural> <cut> <suffix>
#
# Fields
# ======
# plural - The plural word to singularize.
# cut - Number of characters to cut off the end.
#   A '*' indicates change everything.
#   A blank entry indicates no change.
# suffix - The suffix to add on the end.
#   A blank entry indicates no suffix.
#
# If the plural field begins with an upper-case 'R' then the rest
# of the field is a regexp string which will be encased in ^ $
# The order of the expressions IS significant.

# Whitespace is tolerated at the start and end of the lines.

# This list does not have to be in alphabetical order and is by
# no means complete.

# If the whole plural word will change then use a '*' in the cut field.
# For instance 'is' changes to 'are'
is * are
kine * cow
kye * cow

# All 3 of these for octopus are correct.
# See http://en.wikipedia.org/wiki/Octopus#Etymology_and_pluralization
#
# The original and most commonly used.
octopuses 2
# Greek. Correct but rarely used.
octopodes 4 us
# Latin. Incorrect. NB: the efun pluralize returns this for octopus.
octopi 1 us

# Original Latin word for octopus is polypus from the Greek polypous.
# Greek. The correct usage.
polypodes 4 us
# Latin. Incorrect. NB: the efun pluralize returns this for polypus.
polypi 1 us

# Again for platypus. This is a Greek word not Latin.
# Biologist version. Like sheep and fish.
platypus
# Most commonly used by scientists and laypersons.
platypuses 2
# Latin. Incorrect. NB: the efun pluralize returns this for platypus.
platypi 1 us
# Greek form but not well-attested.
platypodes 4 us

# Botanical Latin.
# Other forms of cactus plural are handled elsewhere.
cactuses 2

# These I never knew about.
opera 3 us
sphinges 3 x

# Medical term.
phalanges 3 x
# Military term.
phalanxes 2

# Technically this is correct if a little confusing.
# Second version is the stamp used for molding.
# Remember pluralizing die will return dice.
dice 2 e
dies 1

# Latin or Greek. Can never remember.
bacteria 1 um
consortia 1 um
data 1 um
fora 1 um
media 1 um
millenia 1 um
spectra 1 um
symposia 1 um
corpora 3 us
genera 3 us
loti 1 us
viscera 3 us

# Remember pluralizing virus will return 'viruses'
# The correct form when used for biological infectious agents.
viruses 2
# As in computer virus. Not generally used anymore.
# See http://www.ofb.net/~jlm/virus.html
viri 1 us
virii 2 us

# No change between plural and singular.
benshi
bison
blues
cattle
clothes
deer
fish
glasses
measles
moose
news
otaku
pants
pike
remains
salmon
samurai
series
sheep
species
swine
# thanks
trout

# Balls.
billiard
# The game.
billiards

# No suffix to add on. Just cut off some letters.
appendixes 2
atlases 2
boxen 2
buses 2
bonuses 2
canoes 1
censuses 2
children 3
eyen 1
giraffes 1
hippopotamuses 2
housen 1
hosen 1
insignias 1
lotuses 2
operas 1
oxen 2
prospectuses 2
syllabuses 2
terminuses 2
unixes 2
uteruses 2
vaxen 2

# Cut off some letters and add on the suffix.
indices 4 ex
vertices 4 ex

crises 2 is
testes 2 is

geese 4 oose
mongeese 4 oose

lice 3 ouse
mice 3 ouse

atlantes 4 s
elvii 1 s

appendices 3 x
matrices 3 x
unices 3 x

brethren 6 other
sistren 3 er

panini 1 o
insignia 2 e
feet 3 oot
has 1 ve
monies 3 ey
shoon 2 e
staves 3 ff
thieves 3 f
teeth 4 ooth
were 3 as

# This will try to catch anything else.
# Remember that the order is significant.
# Regex (NOT PCRE) plural search.
R.*s' 1
R.*s's 2
R.*[aeiou]ss

# 'es' endings.
R.*[cglpt]es 1
R.*ies 3 y
R.*oes 2
# Vowel before 'ses'.
R.*[aeiou]ses 1
# Otherwise no vowel.
R.*ses 2
# Vowel before 'ves'.
R.*[aeiou]ves 1
# Otherwise no vowel.
R.*ves 3

R.*men 2 an
R.*mata 2
R.*mas 1

R.*da 1 um
R.*ra 1 um
# Needs to be after 'da' and 'ra'.
R.*a 1 on
R.*das 2 um
R.*ras 2 um

R.*ae 1

R.*[tz]i 1 o

# See octopus, polypus and platypus above.
# These will catch some common Latin plurals.
R.*us
R.*ii 2 us
# Needs to be after 'ii'.
R.*i 1 us

# French origin.
R.*eau[sx] 1

# Inuktitut orign.
R.*uit 2 k

# Has to be last!
R.*s 1

Code: (pluralize.conf) [Select]
# Exception file for pluralize.
#
# File format
# ===========
# <singular> <cut> <suffix>
#
# Fields
# ======
# singular - The singular word to pluralize.
# cut - Number of characters to cut off the end.
#   A '*' indicates change everything.
#   A blank entry indicates no change.
# suffix - The suffix to add on the end.
#   A blank entry indicates no suffix.
#
# If the singular field begins with an upper-case 'R' then the rest
# of the field is a regexp string which will be encased in ^ $
# The order of the expressions IS significant.

# Whitespace is tolerated at the start and end of the lines.

# This list does not have to be in alphabetical order and is by
# no means complete.

# If the whole singular word will change then use a '*' in the cut field.
# For instance 'are' changes to 'is'
are * is

# See http://en.wikipedia.org/wiki/Octopus#Etymology_and_pluralization
#
# The original and most commonly used.
octopus 0 es

# Original Latin word for octopus is polypus from the Greek polypous.
# Greek. The correct usage.
polypus 2 odes

# Again for platypus. This is a Greek word not Latin.
# Most commonly used by scientists and laypersons.
platypus 0 es

# Botanical Latin.
cactus

# These I never knew about.
opus 2 era
sphinx 1 ges

# Pick which one you want to use.
# Military term.
phalanx 0 es
# Medical term.
# phalanx 1 ges

# Replace with the second form if that is required.
die 1 ce
# die 0 s

# Latin or Greek. Can never remember.
consortium 2 a
datum 2 a
forum 2 a
medium 2 a
millenium 2 a
spectrum 2 a
symposium 2 a
corpus 2 ora
genus 2 era
viscus 2 era

# The correct form when used for biological infectious agents.
# See http://www.ofb.net/~jlm/virus.html
virus 0 es

# Joke Latin form.
elvis 1 i

# No change between singular and plural.
benshi
bison
blues
cattle
clothes
deer
fish
glasses
measles
moose
news
otaku
pants
pike
remains
salmon
samurai
series
sheep
species
swine
thanks
trout

# Balls.
billiard
# The game.
billiards

# Just add a suffix.
atlas 0 es
box 0 en
bus 0 es
bonus 0 es
census 0 es
child 0 ren
giraffe 0 s
hippopotamus 0 es
human 0 s
igloo 0 s
kangaroo 0 s
lotus 0 es
ox 0 en
pizza 0 s
prospectus 0 es
syllabus 0 es
terminus 0 es
unix 0 es
uterus 0 es
vax 0 en

# Cut of some letters and add a suffix.
appendix 1 ces
matrix 1 ces

index 2 ices
vertex 2 ices

crisis 2 es
testis 2 es

goose 4 eese

louse 4 ice
mouse 4 ice

staff 2 ves
thief 1 ves

panino 1 i
opera 0 s
insigne 1 ia
insignia 0 s
foot 3 eet
have 2 s
money 2 ies
tooth 4 eeth
was 2 ere

# This will try to catch anything else.
# Remember that the order is significant.
# Regex (NOT PCRE) singular search.

R.*[aeiou]ss
R.*[tz]o 1 i
R.*[os] 0 es
R.*[cglpt]e 0 s
R.*y 1 ies
R.*[aeiou]ve 0 s
R.*man 2 en
R.*ma 0 s
R.*a 0 e
R.*um 2 a
R.*on 2 a
R.*us 0 es
R.*eau 0 x
R.*uk 1 it

Tricky

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #1 on: February 07, 2012, 03:35:17 PM »
Here's some more exception-rules:

nouns that remain the same between singular and plural:  apparel, cod, corps, halibut, means, offspring, perch, pliers, scissors, tongs, tuna, tweezers

plural of box is 'boxes', not 'boxen', general-rule:  words ending in 'x' add 'es' to pluralize (e.g. foxes, boxes, poxes, sexes...)
words ending in 'z' get 'es' endings, too; blintzes, waltzes, quizzes ('z' doubled due to...what the heck rule is that?)
words ending in [vowel]ss get 'es' ending; kisses, misses, losses, wusses
rule for '-ves' ending is tricky; if 'f' or 'fe' at word ending is preceded by a *single* vowel or any consonant (except 'f'), change the 'f' or 'fe' to 'ves'

cul-de-sac->culs-de-sac
elf->elves
focus->foci
graffito->graffiti
knife->knives
man-at-arms->men-at-arms
man-of-war->men-of-war
ninja->shinobi ( http://www.hintsandthings.com/library/plurals.htm mentions this one )
person->people
radius->radii
shelf->shelves
wife->wives

problematic: 'attorney general' should be 'attorneys general' but it's got a whitespace...

for some reason the pluralize() sefun isn't working at all for me.  -_-  should mudconfig defaultparse be set to yes or no, and what else needs changing?

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #2 on: February 07, 2012, 09:34:00 PM »
...blasted typos.  had an extra 'e' in my declaration to include the plurals.conf :P

Offline quixadhal

  • BFF
  • ***
  • Posts: 642
    • View Profile
    • WileyMUD
Re: Singularize and Pluralize sefuns...
« Reply #3 on: February 08, 2012, 11:53:27 AM »
Don't forget to put an #ifdef BRITISH around places where things are different. :)

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #4 on: February 08, 2012, 12:03:01 PM »
Rather more comprehensive listing at http://en.wikipedia.org/wiki/English_plural

Offline Tricky

  • BFF
  • ***
  • Posts: 189
  • I like what I code and I code what I like!
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #5 on: February 10, 2012, 07:00:07 PM »
nouns that remain the same between singular and plural:  apparel, cod, corps, halibut, means, offspring, perch, pliers, scissors, tongs, tuna, tweezers
Note the fish... Generally fish species are both plural and singular.

plural of box is 'boxes', not 'boxen', general-rule:  words ending in 'x' add 'es' to pluralize (e.g. foxes, boxes, poxes, sexes...)
Forgot I had box -> boxen in there. It's generally a joke plural form on the lines of vax -> vaxen.

words ending in 'z' get 'es' endings, too; blintzes, waltzes, quizzes ('z' doubled due to...what the heck rule is that?)
Not sure but maybe words that have a vowel before 'z' get pluralized with 'zes'.

problematic: 'attorney general' should be 'attorneys general' but it's got a whitespace...
I've got an idea for that... will look into it.

Tricky

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #6 on: February 12, 2012, 05:08:47 PM »
additional rule:  words ending in 'tch' get 'es' ending (watch->watches, batch->batches, etc)
(words just ending in 'ch' get normal 's' ending; loch->lochs, lich->lichs, etc)

Offline quixadhal

  • BFF
  • ***
  • Posts: 642
    • View Profile
    • WileyMUD
Re: Singularize and Pluralize sefuns...
« Reply #7 on: February 12, 2012, 11:37:11 PM »
Shouldn't that be lich -> lichen?
*grin*

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #8 on: February 23, 2012, 12:37:11 AM »
So's it was discovered that 'say' gets piped through this at some point and pluralize("say") returns "saies", not "says".  So an exception to add.

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #9 on: February 23, 2012, 05:02:19 AM »
correction:  most words ending in 'ch' get 'es' ending; just 's' is the exception:  lichs, lochs, stomachs, monarchs, bachs, epochs, machs, eunuchs, pentarch, R.*tych, yech, psych, synch, tech, R.*tech, R.*iarch, anarchs, oligarchs

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #10 on: February 25, 2012, 01:40:11 AM »
Addendum to the say pluralization note:  exception should be
money         2   ies
R.*[aeiou]y      0   s
R.*y         1   ies

Offline Tricky

  • BFF
  • ***
  • Posts: 189
  • I like what I code and I code what I like!
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #11 on: February 25, 2012, 01:23:37 PM »
correction:  most words ending in 'ch' get 'es' ending; just 's' is the exception:  lichs, lochs, stomachs, monarchs, bachs, epochs, machs, eunuchs, pentarch, R.*tych, yech, psych, synch, tech, R.*tech, R.*iarch, anarchs, oligarchs

Stop trying to complicate things. You just need 2 rules to catch the majority of words.

Code: [Select]
R.*tch          0       es
R.*ch           0       s

... in that order.

Those two rules will catch the above words you have quoted and practically anything else. Create exceptions for, well, exceptions.

Tricky

Offline z993126

  • BFF
  • ***
  • Posts: 128
    • View Profile
Re: Singularize and Pluralize sefuns...
« Reply #12 on: February 26, 2012, 09:59:15 AM »
Nope.  '-ches' is the norm.  Beaches, leeches, teaches, peaches...the exceptions I listed are where the 'ch' is a hard 'k' sound, and are far less common.