diff -urN jbofihe-0.37/NEWS jbofihe-0.38/NEWS
--- jbofihe-0.37/NEWS	Wed Aug  8 22:41:46 2001
+++ jbofihe-0.38/NEWS	Sat Sep 15 23:10:36 2001
@@ -1,3 +1,12 @@
+New in version 0.38
+===================
+
+jbofihe/vlatai
+--------------
+
+- revisions to the handling of commas and vowel groupings in the word category
+  + validity algorithm.
+
 New in version 0.37
 ===================
 
diff -urN jbofihe-0.37/mk_vfetab.pl jbofihe-0.38/mk_vfetab.pl
--- jbofihe-0.37/mk_vfetab.pl	Wed Aug  8 22:41:48 2001
+++ jbofihe-0.38/mk_vfetab.pl	Sat Sep 15 23:10:40 2001
@@ -1,6 +1,6 @@
 #!/usr/bin/env perl
 
-# $Header: /cvs/src/jbofihe/mk_vfetab.pl,v 1.6 2001/03/21 06:55:21 richard Exp $
+# $Header: /cvs/src/jbofihe/mk_vfetab.pl,v 1.7 2001/09/02 21:21:48 richard Exp $
 
 # Create tables for the second front-end stage of the morphology algorithm.
 # This stage is concerned with tracking the validity of vowel clusters,
@@ -10,9 +10,9 @@
 $VTOK_Y = 3; # must agree with coding for y in consonant FSM
 $VTOK_VV = 16; # ai/au/ei/oi valid anywhere
 $VTOK_VX = 17; # [iu][aeiou] - vowel extended
-$VTOK_VY = 18; # combinations involving y where valid [only in cmene]
-$VTOK_YY = 19; # two copies of y adjacent with no separation (only in hesitation string)
-$VTOK_YCY = 20; # two copies of y with a comma between
+$VTOK_VO = 18; # [aeiou][aeiou] less the 14 patterns above 
+$VTOK_VY = 19; # combinations involving y where valid [only in cmene]
+$VTOK_YY = 20; # two copies of y adjacent with no separation (only in hesitation string)
 $VTOK_UNK = 0;
 
 # Note, C encodes all consonants + apostrophe
@@ -31,22 +31,14 @@
     $r = $VTOK_VV;
 } elsif ($x =~ /.[iu][aeiou]/) {
     $r = $VTOK_VX;
-} elsif ($x =~ /[aeiou],[aeiou]/) {
-    # Conclusion of egroups discussion in Jan 2001 : v,v is treated
-    # as equivalent to v'v, so is valid in any type of word.
-    $r = $VTOK_VV;
-} elsif ($x =~ /C,[aeiou]/) {
-    # For symmetry with V,C and because it seems reasonable for
-    # words like datnlril,odio
-    $r = $VTOK_V;
-} elsif ($x =~ /.[iu]y/ || $x =~ /y,[aeiou]/ || $x =~ /[aeiou],y/) {
+} elsif ($x =~ /.[aeiou][aeiou]/) {
+    $r = $VTOK_VO;
+} elsif ($x =~ /.[iu]y/) {
     $r = $VTOK_VY;
 } elsif ($x =~ /.Cy/) {
     $r = $VTOK_Y;
 } elsif ($x =~ /[Cy]yy/) {
     $r = $VTOK_YY;
-} elsif ($x =~ /y,y/) {
-    $r = $VTOK_YCY;
 } elsif ($x =~ /.C[aeiou]/) {
     $r = $VTOK_V;
 } else {
diff -urN jbofihe-0.37/morf.c jbofihe-0.38/morf.c
--- jbofihe-0.37/morf.c	Wed Aug  8 22:41:48 2001
+++ jbofihe-0.38/morf.c	Sat Sep 15 23:10:40 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/morf.c,v 1.29 2001/07/29 21:56:35 richard Exp $
+  $Header: /cvs/src/jbofihe/morf.c,v 1.30 2001/09/02 21:21:48 richard Exp $
 
   Carry out the morphology functions (hence the name) - take consecutive
   strings of non-whitespace from the input stream and determine what kind of
@@ -95,8 +95,8 @@
 {
   "UNK", "V", "APOS", "Y", "R", "N", "C",
   "NR", "CI", "CSI", "CP", "CS", "CN",
-  "H", "HS", "BT", "VV", "VX", "VY", "YY",
-  "Y,Y"
+  "H", "HS", "BT", "VV", "VX", "VO", 
+  "VY", "YY"
 };
 /*}}}*/
 static char *actnam[] =/*{{{*/
@@ -240,8 +240,6 @@
   enum processed_category result;
   int had_uppercase=0;
   int letter_uppercase;
-  int ended_with_comma=0;
-  int started_with_comma=0;
   MorfType ext_result;
 
   /* Gather info in a local copy, in case client doesn't want it.
@@ -276,24 +274,21 @@
 #endif
 
   p = s;
-  started_with_comma = (*p == ',');
 
   /*{{{  Main per-character loop */
   while (*p) {
     c = *p;
 
-    /* The vowel cluster state machine evolves on all input chars including
-       commas.  (The normal consonant state machine doesn't bother about
-       commas) */
-
-    vsm = ((vsm & 077) << 3) | vmapchar[(unsigned char) c & 0xff];
-    
     /* If char is a comma, just advance now. */
     if (c == ',') {
       p++;
       continue;
     }
     
+    /* Commas are now discarded, even for checking vowel cluster validity */
+
+    vsm = ((vsm & 077) << 3) | vmapchar[(unsigned char) c & 0xff];
+    
     G = (unsigned int) mapchar[(unsigned char) c & 0xff];
     letter_uppercase = (G >> 7) & 1;
     had_uppercase |= letter_uppercase;
@@ -397,11 +392,7 @@
   }
   /*}}}*/
 
-  if (!*p && !(vsm & 0x7)) { /* last char was a comma */
-    ended_with_comma = 1;
-  }
-  
-  if ((state < 0) || started_with_comma || ended_with_comma || (morf_exitval[state] == R_UNKNOWN)) {
+  if ((state < 0) || (morf_exitval[state] == R_UNKNOWN)) {
     result = W_UNKNOWN;
     ext_result = MT_BOGUS;
     decrement = 0;
diff -urN jbofihe-0.37/morf_nfa.in jbofihe-0.38/morf_nfa.in
--- jbofihe-0.37/morf_nfa.in	Wed Aug  8 22:41:48 2001
+++ jbofihe-0.38/morf_nfa.in	Sat Sep 15 23:10:40 2001
@@ -12,7 +12,7 @@
 # - rigorously checks the word form for errors (bad clusters, y where not
 #   required, bad hyphenation after initial CVV rafsi, bad vowel pairing etc)
 #
-# $Header: /cvs/src/jbofihe/morf_nfa.in,v 1.27 2001/07/29 21:56:36 richard Exp $
+# $Header: /cvs/src/jbofihe/morf_nfa.in,v 1.28 2001/09/02 21:21:48 richard Exp $
 #
 #
 # Copyright (C) Richard P. Curnow  1998-2001
@@ -42,7 +42,7 @@
 # Declare all symbols in same order as lexer (lexer returns (0 .. whatever)
 # when it recognizes the corresponding token)
 
-Tokens UNK V APOS Y R N C NR CI CSI CP CS CN H HS BT VV VX VY YY YCY
+Tokens UNK V APOS Y R N C NR CI CSI CP CS CN H HS BT VV VX VO VY YY
 
 # Token meanings are as follows
 # UNK   : Unknown character
@@ -65,10 +65,10 @@
 #         of v,v pattern. [Comma treated the same as apostrophe between vowels]
 # VX    : (extended) 2nd vowel of [iu][aeiou] (allowed as single VV cmavo,
 #         and in fu'ivla & cmene)
+# VO    : other vowel pairs (aa,ae,ao,ea,ee,eo,eu,oa,oe,oo,ou)
 # VY    : vowel pair forms involving y, maybe with a comma between (valid only
 #         in cmene)
 # YY    : 2 copies of the letter y adjacent to each other with no separation
-# YCY   : 2 copies of the letter y with a comma separating them.
 
 # Notes
 # - pairs & triples may have y within them.  This allows this 'grammar' to
@@ -93,6 +93,11 @@
 Abbrev CNR = C|N|R
 Abbrev LCI = CI|CSI
 Abbrev LCP = CI|CSI|CP|CS|NR
+Abbrev FVV = VV|VX|VO
+Abbrev FC = C|N|R
+Abbrev FCP = CI|CP|H
+Abbrev FCS = CSI|CS|HS|NR
+
 
 ###########################################################
 # Subcomponents for lujvo matching
@@ -435,6 +440,48 @@
 ENDBLOCK
 #}}}
 ###########################################################
+#{{{ BLOCK LUJVO_NO_Y_BAD_VOWELS
+# The idea of this block is to pick out words that have lujvo consonant
+# structure, but which can contain invalid vowel pairs.  These have to be
+# filtered out of the stage-IV fu'ivla set later on.  Hence a big
+# simplification : don't care about lujvo forms with 'y' in.
+BLOCK LUJVO_NO_Y_BAD_VOWELS
+
+    STATE c
+        LCP ; V ; FVV       -> v, exit
+        LCP ; V ; APOS ; V  -> v, exit
+        LCP ; V ; CNR       -> c
+        LCP ; LCI ; V       -> v, exit
+
+    STATE v
+        CNR ; V ; FVV       -> v, exit
+        CNR ; V ; APOS ; V  -> v, exit
+        CNR ; LCI ; V       -> v, exit
+        CNR ; V ; CNR       -> c
+
+    STATE cvv1
+        R ; LCP ; V ; FVV       -> v, exit
+        R ; LCP ; V ; APOS ; V  -> v, exit
+        R ; LCP ; V ; CNR       -> c
+        R ; LCP ; LCI ; V       -> v, exit
+        N ; NR  ; V ; FVV       -> v, exit
+        N ; NR  ; V ; APOS ; V  -> v, exit
+        N ; NR  ; V ; CNR       -> c
+
+    STATE exit = TAG_LUJVO_NO_Y_BAD_VOWELS
+
+    STATE in
+        CNR ; V ; FVV       -> cvv1
+        CNR ; V ; APOS ; V  -> cvv1
+        CNR ; LCI ; V       -> v
+        CNR ; V ; CNR       -> c
+
+        CNR ; V ; FVV      ; CNR ; LCI ; V -> exit
+        CNR ; V ; APOS ; V ; CNR ; LCI ; V -> exit
+
+ENDBLOCK
+#}}}
+###########################################################
 #{{{  BLOCK CMAVOSEQ
 BLOCK CMAVOSEQ
 
@@ -547,14 +594,14 @@
 
     STATE in
         V                    -> ex_single
-        V ; VV|VX            -> ex_single
-        V ; VV|VX ; VV|VX    -> main
+        V ; FVV              -> ex_single
+        V ; FVV   ; FVV      -> main
         V ; APOS  ; V        -> main
-        V ; VV|VX ; APOS ; V -> main
+        V ; FVV   ; APOS ; V -> main
 
     STATE main
         APOS ; V  -> main
-        VV|VX     -> main
+        FVV       -> main
                   -> ex_multi
 
 ENDBLOCK
@@ -573,10 +620,6 @@
 ENDBLOCK
 #}}}
 
-Abbrev FC = C|N|R
-Abbrev FCP = CI|CP|H
-Abbrev FCS = CSI|CS|HS|NR
-
 #{{{  BLOCK FV_INTERNAL_CONS_GROUP
 BLOCK FV_INTERNAL_CONS_GROUP
 
@@ -629,12 +672,12 @@
         V        -> v1
 
     STATE v1
-        VV|VX    -> v2
+        FVV      -> v2
         APOS ; V -> v2
                  -> cons.in_req_clus
 
     STATE v2
-        VV|VX    -> v3
+        FVV      -> v3
         APOS ; V -> v3
                  -> cons.in_req_clus
 
@@ -667,7 +710,7 @@
         V        -> v1
 
     STATE v1
-        VV|VX    -> v2
+        FVV      -> v2
         APOS ; V -> v2
                  -> cons.in_req_clus
                  
@@ -881,7 +924,7 @@
                  -> exit
 
     STATE v
-        VV|VX|VY -> v
+        FVV  |VY -> v
         CNR      -> c
         APOS     -> a
 
@@ -891,9 +934,9 @@
     STATE y
         APOS           -> a
         VY             -> v
+        YY             -> y
         LCP|HS|H|CN|BT -> c
         CP ; BT        -> c # deal with nytc, nyts, nydj, nydz
-        YCY            -> y
     
     STATE exit
         = TAG_CMENE
@@ -905,14 +948,17 @@
     gismu : GISMU
     lujvo : LUJVO
     cultural_lujvo : CULTURAL_LUJVO
+    lujvo_no_y_bad_vowels : LUJVO_NO_Y_BAD_VOWELS
     cms : CMAVOSEQ
     cmene : CMENE
     fuivla : FUIVLA
     
     STATE in                  -> gismu.in, lujvo.in, cultural_lujvo.in,
-                                 fuivla.in_no_prefix, cms.in, cmene.in
+                                 fuivla.in_no_prefix, cms.in, cmene.in,
+                                 lujvo_no_y_bad_vowels.in
     STATE cms.exit_prefixable -> gismu.in, lujvo.in, cultural_lujvo.in,
-                                 fuivla.in_prefixed
+                                 fuivla.in_prefixed,
+                                 lujvo_no_y_bad_vowels.in
 
     #####
     STATE in     # ENTRY STATE NAMED LAST IN FILE
@@ -982,8 +1028,8 @@
 Result S_FUIVLA_1 &  TAG_X_STAGE3_LONG -> R_X_STAGE3_1
 Result S_FUIVLA_1 &  TAG_X_STAGE3_CVC & !TAG_X_STAGE3_LONG -> R_X_STAGE3_1_CVC
 
-Result S_FUIVLA_0 & ~ANY_STAGE3 -> R_STAGE4_0
-Result S_FUIVLA_1 & ~ANY_STAGE3 -> R_STAGE4_1
+Result S_FUIVLA_0 & ~ANY_STAGE3 & ~TAG_LUJVO_NO_Y_BAD_VOWELS -> R_STAGE4_0
+Result S_FUIVLA_1 & ~ANY_STAGE3 & ~TAG_LUJVO_NO_Y_BAD_VOWELS -> R_STAGE4_1
 
 # All of the following are disjoint with each other and with any
 # of the earlier ones
diff -urN jbofihe-0.37/version.txt jbofihe-0.38/version.txt
--- jbofihe-0.37/version.txt	Wed Aug  8 22:41:51 2001
+++ jbofihe-0.38/version.txt	Sat Sep 15 23:10:43 2001
@@ -1 +1 @@
-char version_string[] = "$Name: V0_37 $";
+char version_string[] = "$Name: V0_38 $";

