diff -urN jbofihe-0.36/LICINS jbofihe-0.37/LICINS
--- jbofihe-0.36/LICINS	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/LICINS	Thu Jan  1 01:00:00 1970
@@ -1,14 +0,0 @@
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of version 2 of the GNU General Public License as
-published by the Free Software Foundation.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
diff -urN jbofihe-0.36/Makefile.dos jbofihe-0.37/Makefile.dos
--- jbofihe-0.36/Makefile.dos	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/Makefile.dos	Wed Aug  8 22:41:46 2001
@@ -1,4 +1,4 @@
-# $Header: /cvs/src/jbofihe/Makefile.dos,v 1.7 2001/03/20 21:27:23 richard Exp $
+# $Header: /cvs/src/jbofihe/Makefile.dos,v 1.9 2001/07/31 20:46:01 richard Exp $
 #
 # Makefile for parser/glosser using DJGPP on DOS
 #
@@ -29,7 +29,8 @@
 	properties.o conversion.o terms.o memory.o tenses.o \
 	output.o textout.o htmlout.o connect.o stag.o latexblk.o \
 	relative.o textblk.o errorscan.o canonluj.o lujvofns.o \
-	erasure.o rpc_full.o morf.o tracebk.o elide.o dictaccs.o
+	erasure.o rpc_full.o morf.o morf_dfa.o bccheck.o \
+	tracebk.o elide.o dictaccs.o
 
 SRCS2 = $(OBJS2:%.o=%.c)
 
@@ -59,8 +60,11 @@
 cmafihe : $(CM_OBJS)
 	$(CC) $(CFLAGS) -o cmafihe $(CM_OBJS)
 
-vlatai : morf.c morf.h morf_dfa.c morf_enc.c morf_lex.c morfvlex.c
-	$(CC) -o vlatai -DTEST_MORF $(CFLAGS) morf.c
+vlatai.o : morf.c morf.h morf_enc.c morf_lex.c morfvlex.c
+	$(CC) $(CFLAGS) -c -o vlatai.o -DTEST_MORF morf.c
+    
+vlatai : vlatai.o morf_dfa.o bccheck.o canonluj.o
+	$(CC) -o vlatai -DTEST_MORF $(CFLAGS) vlatai.o morf_dfa.o bccheck.o canonluj.o
 
 jvocuhadju : jvocuhadju.o lujvofns.o
 
diff -urN jbofihe-0.36/Makefile.in jbofihe-0.37/Makefile.in
--- jbofihe-0.36/Makefile.in	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/Makefile.in	Wed Aug  8 22:41:46 2001
@@ -1,4 +1,4 @@
-# $Header: /cvs/src/jbofihe/Makefile.in,v 1.50 2001/03/21 23:04:00 richard Exp $
+# $Header: /cvs/src/jbofihe/Makefile.in,v 1.60 2001/08/03 20:39:54 richard Exp $
 #
 # Makefile for parser/glosser
 #
@@ -27,6 +27,7 @@
 DICTNAME=smujmaji.dat
 DICTIONARY=$(LIBDIR)/$(DICTNAME)
 MANDIR=$(PREFIX)/man/man1
+CMAFIHE_LDOPTS=@@CMAFIHE_LDOPTS@@
 
 CC=gcc
 CFLAGS= @@OPTDEBUG@@ @@DEFINES@@ -DDEFAULT_DICTIONARY=\"$(DICTIONARY)\"
@@ -36,7 +37,8 @@
         properties.o conversion.o terms.o memory.o tenses.o \
         output.o textout.o htmlout.o connect.o stag.o latexblk.o \
         relative.o textblk.o errorscan.o canonluj.o lujvofns.o \
-        erasure.o rpc_full.o morf.o tracebk.o elide.o dictaccs.o
+        erasure.o rpc_full.o morf.o morf_dfa.o bccheck.o \
+        tracebk.o elide.o dictaccs.o
 
 SRCS2 = $(OBJS2:%.o=%.c)
 
@@ -131,8 +133,11 @@
 smujajgau.o : smujajgau.c
 	$(CC) $(CFLAGS) -c smujajgau.c
 
-vlatai : morf.c morf.h morf_dfa.c morfnc_dfa.c morf_enc.c morf_lex.c morfvlex.c
-	$(CC) -o vlatai -DTEST_MORF $(CFLAGS) morf.c
+vlatai.o : morf.c morf.h morf_enc.c morf_lex.c morfvlex.c
+	$(CC) $(CFLAGS) -c -o vlatai.o -DTEST_MORF morf.c
+    
+vlatai : vlatai.o morf_dfa.o bccheck.o canonluj.o
+	$(CC) -o vlatai -DTEST_MORF $(CFLAGS) vlatai.o morf_dfa.o bccheck.o canonluj.o
 
 morf_lex.c : mk_fetab.pl
 	perl mk_fetab.pl > morf_lex.c 2>morf_lex.err
@@ -143,16 +148,21 @@
 morf_enc.c : mk_enctab.pl
 	perl mk_enctab.pl > morf_enc.c 2>morf_enc.err
 
-morf_dfa.c : morf_nfa.in n2d/n2d
-	n2d/n2d -v -r morf_dfa.report < morf_nfa.in > morf_dfa.c
+morf_dfa.c : morf_nfa.in dfasyn/dfasyn
+	dfasyn/dfasyn -v -r morf_dfa.report morf_nfa.in -o morf_dfa.c
+
+bctest : bccheck.c bctables.c
+	$(CC) $(CFLAGS) -o bctest -DTEST=1 bccheck.c
 
-morfnc_dfa.c : morf_nfa.in n2d/n2d
-	grep -v 'CULTURAL' < morf_nfa.in | n2d/n2d -v -r morfnc_dfa.report > morfnc_dfa.c
+bccheck.o : bccheck.c bctables.c
 
-n2d/n2d:
-	(cd n2d && make all)
+bctables.c : bctables.in dfasyn/dfasyn
+	dfasyn/dfasyn -v -r bctables.report < bctables.in > bctables.c
+    
+dfasyn/dfasyn:
+	(cd dfasyn && make all)
 
-morf.o : morf.c morf.h morf_dfa.c morfnc_dfa.c morf_enc.c morf_lex.c morfvlex.c
+morf.o : morf.c morf.h morf_dfa.c morf_enc.c morf_lex.c morfvlex.c
 
 txtman: jbofihe.txt cmafihe.txt smujajgau.txt jvocuhadju.txt vlatai.txt
 
@@ -182,7 +192,7 @@
 	./smujajgau $(DICTNAME) extradict places.dat patterns
 
 cmafihe : $(CM_OBJS)
-	$(CC) $(CFLAGS) -o cmafihe $(CM_OBJS)
+	$(CC) $(CFLAGS)  $(CMAFIHE_LDOPTS) -o cmafihe $(CM_OBJS)
 
 cm_scan.c : cm_scan.l
 	flex -t cm_scan.l > cm_scan.c
@@ -192,6 +202,9 @@
 
 jvocuhadju.o : jvocuhadju.c version.h
 
+test_ivl : lujvofns.c
+	$(CC) $(CFLAGS) -o test_ivl -DTEST_IS_VALID_LUJVO lujvofns.c
+
 smujajgau.o : smujajgau.c version.h
 
 canonluj.o : canonluj.inc
@@ -200,17 +213,16 @@
 	perl canonluj.pl < reduced_gismu > canonluj.inc
 
 clean:
-	-rm *.output \
-        *.tab.c *.tab.o *_tab.c *_tab.o rpc_full.c \
-        *.o \
+	-(cd dfasyn && make clean)
+	-rm *.output *.tab.c *_tab.c rpc_full.c *.o \
         jbofihe cmafihe smujajgau vlatai jvocuhadju \
         *.dict uncom.c uncom.o uncom \
         morf_lex.c morfvlex.c morf_enc.c morf*_dfa.c \
         elitabs.c trcftabs.c trctabs.c \
         stag.c stag.h stag.tab.c stag.tab.h \
-        rpc2x_nc.y rpc2x_act.y rpc_full_nc.y rpc_full_act.y \
+        rpc2x_nc.y rpc2x_act.y rpc2x_full_nc.y rpc2x_full_act.y \
         canonluj.inc version.h \
-        nonterm.*
+        nonterm.c nonterm.h morf_dfa.report morf_lex.err
 
 # Specify in this perverse way so that the $-Name construction doesn't get replaced on checkout!
 
@@ -270,7 +282,7 @@
 lujvofns.o: lujvofns.c lujvofns.h
 erasure.o: erasure.c nodes.h nonterm.h rpc_tab.h functions.h output.h
 rpc_full.o: rpc_full.c nodes.h nonterm.h functions.h output.h
-morf.o: morf.c morf.h morf_lex.c morfvlex.c morf_dfa.c morfnc_dfa.c \
+morf.o: morf.c morf.h morf_lex.c morfvlex.c morf_dfa.c \
  morf_enc.c
 tracebk.o: tracebk.c trctabs.c trcftabs.c
 elide.o: elide.c nodes.h nonterm.h rpc_tab.h cmavotab.h functions.h \
diff -urN jbofihe-0.36/NEWS jbofihe-0.37/NEWS
--- jbofihe-0.36/NEWS	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/NEWS	Wed Aug  8 22:41:46 2001
@@ -1,3 +1,75 @@
+New in version 0.37
+===================
+
+general
+=======
+- Include contributed Emacs Lisp functions for driving jbofi'e.
+- Add fold markers in some of the source code for editing with VIM version 6
+
+jbofihe
+=======
+- Improve translations of vowels & other words followed by "bu".  (Only a
+  partial list is included in the extradict file.)
+- Fix crashes which occurred when tagging term places when the selbri was "jai"
+  followed by something other than a brivla.
+- Add entries for mu'e, pu'u, za'i, zu'o to the 'patterns' file for use in
+  glossing unknown lujvo.
+- Integrate recognition of cultural rafsi in lujvo into the ordinary state
+  machine (so now 1 integrated state machine instead of 2 separate ones).
+  Consequent reduction in table sizes.
+- Recognize stage-3-like fu'ivla with >1 short rafsi before the hyphen and
+  gloss accordingly if the word isn't in the dictionary 'as-is'.
+- New more robust checking for bad cmene (containing la, doi etc)
+- Fix bug : missing 'break' statements causing fall-through and hence bogus
+  access on another branch of a union.
+- Deprecate -g command option from man page.
+- Fix glossing of the words for 10**X (the asterisks were treated as
+  meta-characters by the glosser)
+- Add support for experimental cmavo "xa'o" (selma'o ZAhO, opposite of "za'o")
+
+vlatai
+======
+- For stage 3 fu'ivla, show forward slashes to split the rafsi prefix from the
+  hyphen and the hyphen from the word tail.
+- Recognize an extension to stage 3 fu'ivla, with >1 rafsi before the hyphen
+- Cultural rafsi scanning improvement as for jbofihe.  Remove -cr command line
+  argument.
+- Only show exit value of state machine in verbose (-v) mode.
+- Detect and show bad cmene forms (la/lai/doi break-up)
+- New option '-el' to show a lujvo broken into its component words.
+- Fixed some more obscure faults with words containing patterns like "nytc" and
+  "ntyc".
+
+cmafihe
+=======
+- Include contribution from Björn Gohla to output GNUStep property lists for
+  the vocabulary found in the input.
+
+jvocuhadju
+==========
+- Complete rewrite of the lujvo form checker used to detect tosmabru failures.
+  The old one was completely broken.
+- Added the script testing/jvocipra.pl, which couples jvocuhadju and vlatai in
+  a back-to-back configuration for mutual testing.
+- Add '-l' switch to try long rafsi even if a short one is available.
+- Add '-a' switch to show all possible lujvo forms, not just the 8 most
+  preferred ones.
+
+lexer generator (dfasyn/*)
+===============
+- Add handling of 'state attributes' (to allow actions to be run when automaton
+  is in DFA states corresponding to particular NFA states)
+- Fix bug in NFA compressor, where algorithm could run off the end of an array.
+- Fix bug in DFA compressor (was functionally correct but over-pessimistic
+  about combining states)
+- Improve command line options
+- Add %{ ... %} text inclusion facility
+- Allow exit and attribute table element types to be defined by input grammar
+- Add 'early exit' capability for DFA states that will immediately exit (=>
+  reduced table sizes + more state merging)
+- Provide next_state functions in the output file.
+- Renamed "dfasyn" with a view to it being made a project in its own right.
+
 New in version 0.36
 ===================
 
diff -urN jbofihe-0.36/README.PLIST jbofihe-0.37/README.PLIST
--- jbofihe-0.36/README.PLIST	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/README.PLIST	Wed Aug  8 22:41:46 2001
@@ -0,0 +1,40 @@
+Björn Gohla <b.gohla@gmx.de> 01.08.2001
+
+When configuring
+
+	$ perl ./config.pl --plist
+
+enables property list output in cmafihe, the lojban glosser. Thus 
+
+	$ cmafihe -p 
+
+will output the property list to the standard output.  Property lists are
+stringified representations of data structures like hash tables, arrays, or
+binary data very well integrated in gnustep (http://www.gnustep.org). the
+library libPropList (which can be retrieved from http://www.windowmaker.org) is
+a C API for those data structures.
+
+I have implemented and tested this only on my Linux box, so I have no idea
+whether it works on any other platform.
+
+The motivation for this feature is that I am working on a vocabulary learning
+tool (soon to be released) which is written in gnustep. I want to be able to
+take arbitrary Lojban text and extract all valid Lojban words, such that having
+leared the extracted vocabulary one is able to understand the text.  So I may
+someday be able to follow Lojban discussions on lojban@yahoogroups.com ;) .
+
+
+For the sake of authenticity here comes a little preview:
+
+	$  echo "mi na jimpe la'e di'u " |./cmafihe -p
+
+yields:
+
+	{
+		mi = "I, me";
+		na = not;
+		jimpe = understand;
+		"la'e" = "the referent of";
+		"di'u" = "last utterance";
+	}
+
diff -urN jbofihe-0.36/action.perl jbofihe-0.37/action.perl
--- jbofihe-0.36/action.perl	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/action.perl	Wed Aug  8 22:41:46 2001
@@ -1,4 +1,4 @@
-# $Header: /cvs/src/jbofihe/action.perl,v 1.5 2001/01/14 22:42:26 richard Exp $
+# $Header: /cvs/src/jbofihe/action.perl,v 1.6 2001/05/09 22:06:55 richard Exp $
 #
 # Perl script to read through the bison grammar for lojban and insert
 # simple actions, typically to build a nonterminal node.
@@ -88,7 +88,7 @@
             if ($nchildren < 0) { $nchildren = 0; }
             $n_live_children = 0;
             for ($i=1; $i<=$nchildren; $i++) {
-                if ($children[$i] =~ /START_EK|START_GIHEK|START_GUHEK|START_JEK|START_JOIK|START_GEK|START_BAI|EK_KE|EK_BO|JEK_KE|JEK_BO|JOIK_KE|JOIK_BO|I_JEKJOIK|I_BO|GIHEK_KE|GIHEK_BO|NAhE_BO|NAhE_time|NAhE_space|NAhE_CAhA|NUMBER_MAI|NUMBER_MOI|NUMBER_ROI|EOF_MARK/) {
+                if ($children[$i] =~ /START_EK|START_GIHEK|START_GUHEK|START_JEK|START_JOIK|START_GEK|START_BAI|EK_KE|EK_BO|JEK_KE|JEK_BO|JOIK_KE|JOIK_BO|I_JEKJOIK|I_BO|GIHEK_KE|GIHEK_BO|NAhE_BO|NAhE_time|NAhE_space|NAhE_CAhA|NA_KU|NUMBER_MAI|NUMBER_MOI|NUMBER_ROI|EOF_MARK/) {
                     $flags[$i] = 0;
                 } else {
                     $flags[$i] = 1;
@@ -122,7 +122,7 @@
         if ($nchildren < 0) { $nchildren = 0; }
         $n_live_children = 0;
         for ($i=1; $i<=$nchildren; $i++) {
-            if ($children[$i] =~ /START_EK|START_GIHEK|START_GUHEK|START_JEK|START_JOIK|START_GEK|START_BAI|EK_KE|EK_BO|JEK_KE|JEK_BO|JOIK_KE|JOIK_BO|I_JEKJOIK|I_BO|GIHEK_KE|GIHEK_BO|NAhE_BO|NAhE_time|NAhE_space|NAhE_CAhA|NUMBER_MAI|NUMBER_MOI|NUMBER_ROI|EOF_MARK/) {
+            if ($children[$i] =~ /START_EK|START_GIHEK|START_GUHEK|START_JEK|START_JOIK|START_GEK|START_BAI|EK_KE|EK_BO|JEK_KE|JEK_BO|JOIK_KE|JOIK_BO|I_JEKJOIK|I_BO|GIHEK_KE|GIHEK_BO|NAhE_BO|NAhE_time|NAhE_space|NAhE_CAhA|NA_KU|NUMBER_MAI|NUMBER_MOI|NUMBER_ROI|EOF_MARK/) {
                 $flags[$i] = 0;
             } elsif ($children[$i] =~ m{\?([A-Zh]+)}) {
                 $flags[$i] = 2;
diff -urN jbofihe-0.36/bccheck.c jbofihe-0.37/bccheck.c
--- jbofihe-0.36/bccheck.c	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/bccheck.c	Wed Aug  8 22:41:46 2001
@@ -0,0 +1,191 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/bccheck.c,v 1.2 2001/07/24 21:22:23 richard Exp $
+
+  State machine to check for bad cmene.
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+#if defined(TEST)
+#include <stdio.h>
+#endif
+
+#include "bccheck.h"
+
+enum tokens {/*{{{*/
+  TA, TE, TI, TO, TU, TY, TD, TL,
+  TOC, TAP, TCM, TXX
+};
+/*}}}*/
+enum result {/*{{{*/
+  BC_NOT_COMPLETE,
+  BC_CMENE_OK,
+  BC_CMENE_BAD_SPLIT,
+  BC_CMENE_BAD_NOSPLIT
+};
+/*}}}*/
+enum attribute {/*{{{*/
+  ATTR_NONE,
+  ATTR_SEEN_LD
+};
+/*}}}*/
+
+/* Include tables defining DFA */
+#include "bctables.c"
+
+static unsigned char mapchar[256] =/*{{{*/
+/* Map ASCII set to the tokens. */
+{
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TAP ,
+  TXX , TXX , TXX , TXX , TCM , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+
+  TXX , TA  , TOC , TOC , TD  , TE  , TOC , TOC ,
+  TOC , TI  , TOC , TOC , TL  , TOC , TOC , TO  ,
+  TOC , TOC , TOC , TOC , TOC , TU  , TOC , TOC ,
+  TOC , TY  , TOC , TXX , TXX , TXX , TXX , TXX ,
+
+  TXX , TA  , TOC , TOC , TD  , TE  , TOC , TOC ,
+  TOC , TI  , TOC , TOC , TL  , TOC , TOC , TO  ,
+  TOC , TOC , TOC , TOC , TOC , TU  , TOC , TOC ,
+  TOC , TY  , TOC , TXX , TXX , TXX , TXX , TXX ,
+
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX ,
+  TXX , TXX , TXX , TXX , TXX , TXX , TXX , TXX 
+};
+
+/*}}}*/
+int is_bad_cmene(char *word, int *split, char **ladoi, char **tail)/*{{{*/
+{
+  char *p;
+  char *ladoi_1;
+  int cs = 0, ns;
+  enum result res;
+
+  for (p=word; *p; p++) {
+    ns = bad_cmene_next_state(cs, mapchar[(unsigned int) *p & 0xff]);
+    res = bad_cmene_exitval[ns];
+    /* Deal with early exit conditions */
+    if (res == BC_CMENE_BAD_NOSPLIT || res == BC_CMENE_BAD_SPLIT) {
+      if (ladoi) *ladoi = ladoi_1;
+      if (tail) *tail = p;
+      break;
+    }
+    
+    /* By doing this here, we protect against problem words like "salad",
+       where the position of "d" would need to be remembered in case it starts
+       "doi", but that would overwrite th stored position of "l" */
+    if (bad_cmene_attribute[ns] == ATTR_SEEN_LD) ladoi_1 = p;
+    
+    cs = ns;
+  }
+
+  res = bad_cmene_exitval[ns];
+
+  if (split) *split = 0;
+  
+  switch (res) {
+    case BC_CMENE_OK:
+      return 0;
+    case BC_CMENE_BAD_SPLIT:
+      if (split) *split = 1;
+      return 1;
+    case BC_CMENE_BAD_NOSPLIT:
+    case BC_NOT_COMPLETE:
+      return 1;
+  }
+
+  return 0; /* make compiler happy */
+
+}/*}}}*/
+
+#if defined(TEST)
+static void do_check(char *s)/*{{{*/
+{
+  int res;
+  char *ladoi, *tail;
+  int split;
+  char *p;
+
+  res = is_bad_cmene(s, &split, &ladoi, &tail);
+  printf("%s : %s", s, res ? "BAD" : "good");
+  if (res) {
+    if (split) {
+      printf(" : ");
+      for (p=s; *p; p++) {
+        if (p == ladoi) putchar('+');
+        if (p == tail)  putchar('+');
+        putchar(*p);
+      }
+    }
+  }
+  printf("\n");
+  return;
+}
+/*}}}*/
+int main (int argc, char **argv)/*{{{*/
+{
+
+  do_check("laplas");
+  do_check("stalaplas");
+  do_check("stala'iplas");
+  do_check("stala,iplas");
+  do_check("stala'ip,las");
+  do_check("laplus");
+  do_check("alaun");
+  do_check("ritcrd");
+  do_check("ritc,r,d");
+  do_check("mecylakorunas");
+  do_check("mecylaukorunas");
+  do_check("mecyla'ikorunas");
+  do_check("mecyla,ikorunas");
+  do_check("mecyda,ikorunas");
+
+  return 0;
+}/*}}}*/
+#endif /* defined(TEST) */
+
+         
+
+
+
+
diff -urN jbofihe-0.36/bccheck.h jbofihe-0.37/bccheck.h
--- jbofihe-0.36/bccheck.h	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/bccheck.h	Wed Aug  8 22:41:46 2001
@@ -0,0 +1,36 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/bccheck.h,v 1.1 2001/06/18 22:13:14 richard Exp $
+
+  Header file for bad cmene checking function
+  
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+#ifndef BCCHECK_H
+#define BCCHECK_H
+
+/* Return 1 if a proposed cmene in 'word' is bad.  'split' is 1 if the word
+ * breaks up (as opposed to just being bad but not breaking).  'ladoi' gives
+ * the position of the la/la'i/doi, and 'tail' gives the position of the
+ * remainder of the word. */
+extern int is_bad_cmene(char *word, int *split, char **ladoi, char **tail);
+
+#endif /* BCCHECK_H */
+
diff -urN jbofihe-0.36/bctables.in jbofihe-0.37/bctables.in
--- jbofihe-0.36/bctables.in	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/bctables.in	Wed Aug  8 22:41:46 2001
@@ -0,0 +1,75 @@
+Tokens A E I O U Y D L OCONS APOS COMMA
+# OCONS = consonant other than D or L
+
+Prefix bad_cmene
+
+Abbrev VOWEL = A|E|I|O|U
+Abbrev CONS  = D|L|OCONS
+Abbrev ANY   = VOWEL|Y|CONS|APOS|COMMA
+
+Block LA_DOI
+# Detect LA/LAI/DOI
+    State in
+        L -> l
+        D -> d
+
+    State l (ATTR_SEEN_LD)
+        A ;     APOS|COMMA|CONS -> blocked
+        A ; I ; APOS|COMMA|CONS -> blocked
+
+    State d (ATTR_SEEN_LD)
+        O ; I ; APOS|COMMA|CONS -> blocked
+
+    State blocked
+
+EndBlock
+
+Block MAIN
+
+    split : LA_DOI
+    nosplit : LA_DOI
+
+    State split.blocked   = TAG_SPLIT
+    State nosplit.blocked = TAG_NOSPLIT
+
+    State v
+        VOWEL|Y -> v
+        APOS|COMMA -> a
+        CONS -> c
+        -> split.in
+
+    State a
+        VOWEL|Y -> v
+
+    State c
+        CONS -> c
+        COMMA ; CONS -> c
+        Y -> y
+        VOWEL -> v
+        = TAG_OK
+
+    State y
+        APOS -> a
+        COMMA ; VOWEL|Y -> v
+        CONS -> c
+        -> nosplit.in
+
+    State in
+        CONS -> c
+        Y -> y
+        VOWEL -> v
+        -> split.in
+
+EndBlock
+
+EarlyResult TAG_SPLIT -> BC_CMENE_BAD_SPLIT
+EarlyResult TAG_NOSPLIT -> BC_CMENE_BAD_NOSPLIT
+Result TAG_OK & ~(TAG_SPLIT | TAG_NOSPLIT) -> BC_CMENE_OK
+DefResult BC_NOT_COMPLETE
+
+Attr Result ATTR_SEEN_LD
+Attr DefResult ATTR_NONE
+
+Type "enum result"
+Attr Type "enum attribute"
+
diff -urN jbofihe-0.36/build_kit jbofihe-0.37/build_kit
--- jbofihe-0.36/build_kit	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/build_kit	Thu Jan  1 01:00:00 1970
@@ -1,88 +0,0 @@
-#!/usr/local/bin/perl
-
-# $Header: /cvs/src/jbofihe/build_kit,v 1.12 2001/03/21 23:05:15 richard Exp $
-
-# Copyright (C) Richard P. Curnow  1998-2001
-# 
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-# 
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-# 
-
-# Script used to 'groom' the distribution for release after doing the 'cvs export'.
-# (Although not really needed in the distribution, this is bundled for
-# completeness and for 'disaster-recovery' purposes.)
-
-# This is set here rather than computed as runtime so that it is
-# version-controlled.  Otherwise, there is no way to guarantee being able to
-# recreate an old version if the year has incremented since the kit was last
-# built
-
-$year = 2001;
-
-@files = qx|find . -type f -print|;
-open(COPY, "<LICINS");
-@copy=<COPY>;
-close(COPY);
-for $f (@files) {
-    chomp $f;
-    $done = 0;
-    print "doing $f\n";
-    rename "$f","$f.tmp";
-    open (IN, "<$f.tmp");
-    open (OUT, ">$f");
-    while (<IN>) {
-	if (!$done && m,\/\* COPYRIGHT \*\/,) {
-        # When the copyright string is not locally in the source file.  This is the default
-        # for most of the parts Richard wrote.
-	    print OUT "/**********************************************************************\n";
-        print OUT " * Copyright (C) Richard P. Curnow  1998-$year\n";
-	    for (@copy) {
-            print OUT " * ".$_;
-	    }
-	    print OUT " *********************************************************************/\n";
-        $done = 1;
-	} elsif (!$done && m,\# COPYRIGHT,) {
-	    print OUT "#\n";
-        print OUT "# Copyright (C) Richard P. Curnow  1998-$year\n";
-	    for (@copy) {
-            print OUT "# ".$_;
-	    }
-	    print OUT "#\n";
-        $done = 1;
-	} elsif (!$done && m,\/\* LICEN[CS]E \*\/,) {
-        # For the case where the copyright strings are in the file itself.  This may be
-        # the case with multiple contributors.
-        print OUT "/*\n";
-	    for (@copy) {
-            print OUT " * ".$_;
-	    }
-        print OUT "*/\n";
-        $done = 1;
-	} elsif (!$done && m,\# LICEN[CS]E,) {
-        # For the case where the copyright strings are in the file itself.  This may be
-        # the case with multiple contributors.
-	    for (@copy) {
-            print OUT "# ".$_;
-	    }
-        $done = 1;
-	} else {
-	    print OUT;
-	}
-    }
-    close(IN);
-    close(OUT);
-    unlink "$f.tmp";
-}
-
-chmod 0755, "configure";
-
diff -urN jbofihe-0.36/categ.c jbofihe-0.37/categ.c
--- jbofihe-0.36/categ.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/categ.c	Wed Aug  8 22:41:46 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/categ.c,v 1.12 2001/02/02 22:20:39 richard Exp $
+  $Header: /cvs/src/jbofihe/categ.c,v 1.13 2001/05/09 22:06:55 richard Exp $
 
   Look for particular constructions in the token sequence and insert
   markers before them.  This provides the workaround to the lojban
@@ -1196,6 +1196,38 @@
 
 }
 
+/*++++++++++++++++++++++++++++++
+  Deal with inserting PRIVATE_NA_KU
+  ++++++++++++++++++++++++++++++*/
+
+static void
+categorize_naku(TreeNode *head)
+{
+  TreeNode *x, *y, *marker;
+
+  for (x = head->next; x!=head; x=x->next) {
+    if (x->type == N_CMAVO &&
+        x->data.cmavo.selmao == NA) {
+      y = x->next;
+      if (y->type == N_CMAVO &&
+          y->data.cmavo.selmao == KU) {
+        
+        marker = new_node();
+        marker->type = N_MARKER;
+        marker->start_line = x->start_line;
+        marker->start_column = x->start_column;
+        marker->data.marker.tok = PRIVATE_NA_KU;
+        marker->data.marker.text = new_string("PRIVATE_NA_KU");
+        /* Insert before x */
+        marker->prev = x->prev;
+        marker->next = x;
+        x->prev->next = marker;
+        x->prev = marker;
+      }
+    }
+  }  
+}
+
 /*++++++++++++++++++++++++++++++++++++++
   This function looks at particular types of token and makes them more
   specific depending on what comes further on in the token stream.
@@ -1221,5 +1253,6 @@
   categorize_number_moi(head);
   categorize_bai(head);
   categorize_nahe(head);
+  categorize_naku(head);
   mark_cmavo_before_free(head);
 }
diff -urN jbofihe-0.36/cm.h jbofihe-0.37/cm.h
--- jbofihe-0.36/cm.h	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/cm.h	Wed Aug  8 22:41:46 2001
@@ -1,6 +1,6 @@
 /***************************************
 
-  $Id: cm.h,v 1.2 2000/05/03 22:06:04 richard Exp $
+  $Id: cm.h,v 1.3 2001/07/31 21:11:17 richard Exp $
 
   Header file for all local fns etc.
 
@@ -34,6 +34,10 @@
 #include <string.h>
 #include <ctype.h>
 
+#ifdef PLIST
+#include <proplist.h>
+#endif
+
 #define new_string(s) strcpy((char *) malloc(1+strlen(s)), (s))
 #define extend_string(s, x) strcat((char *) realloc(s, 1+strlen(s)+strlen(x)), x)
 #define new(T) (T *) malloc(sizeof(T))
@@ -45,7 +49,9 @@
   OF_LATEX,
   OF_TEXT,
   OF_TEXTBLK
-
+#ifdef PLIST
+  ,OF_PLIST
+#endif
 } OutputFormat;
 
 /*+ Flag indicating whether to generate latex blocked output instead
diff -urN jbofihe-0.36/cm_main.c jbofihe-0.37/cm_main.c
--- jbofihe-0.36/cm_main.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/cm_main.c	Wed Aug  8 22:41:46 2001
@@ -1,11 +1,12 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/cm_main.c,v 1.5 2000/10/14 07:18:20 richard Exp $
+  $Header: /cvs/src/jbofihe/cm_main.c,v 1.7 2001/07/31 21:18:11 richard Exp $
 
   Main routine for mini-translater
   ***************************************/
 
-/**********************************************************************
- * Copyright (C) Richard P. Curnow  1998-2001
+/* Copyright 1998-2001 Richard P. Curnow */
+/* Help options added by Björn Gohla */
+/*
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -20,12 +21,13 @@
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  * 
- *********************************************************************/
+*/
 
 #include "cm.h"
 #include "version.h"
 
 extern FILE *yyin;
+extern int yylex(void);
 
 int
 yywrap(void)
@@ -47,11 +49,26 @@
       ofmt = OF_LATEX;
     } else if (!strcmp(*argv, "-b")) {
       ofmt = OF_TEXTBLK;
+#ifdef PLIST
+    } else if (!strcmp(*argv, "-p")) {
+      ofmt = OF_PLIST;
+#endif 
     } else if (!strcmp(*argv, "-w")) {
       ++argv, --argc;
       width = atoi(*argv);
     } else if (!strcmp(*argv, "-v")) {
       fprintf(stderr, "cmafihe version %s\n", version_string);
+      exit(0);
+    } else if ( !strcmp(*argv, "-h") || !strcmp(*argv, "--help") ) {
+      fprintf(stderr, "cmafihe, gloss lojban text without verifying\n"
+                      "usage: cmafihe [-b [-w WIDTH] | -p | -l | -v] [FILENAME]\n"
+                      "no options : output inline ascii\n"
+                      "-b         : output blocked ascii with optional WIDTH, default %i\n"
+                      "-l         : output blocked latex code\n"
+#ifdef PLIST
+                      "-p         : output GNUStep property list with vocabulary\n"
+#endif
+                      "-v         : version\n", width);
       exit(0);
     } else {
       filename = *argv;
diff -urN jbofihe-0.36/cm_output.c jbofihe-0.37/cm_output.c
--- jbofihe-0.36/cm_output.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/cm_output.c	Wed Aug  8 22:41:46 2001
@@ -1,11 +1,13 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/cm_output.c,v 1.2 2000/05/03 22:02:37 richard Exp $
+  $Header: /cvs/src/jbofihe/cm_output.c,v 1.3 2001/07/31 21:11:17 richard Exp $
 
   Output formatting functions
   ***************************************/
 
-/**********************************************************************
- * Copyright (C) Richard P. Curnow  1998-2001
+/* Copyright 1998-2001 Richard P. Curnow */
+/* Includes contributions from Björn Gohla to provide the GNUStep
+ * interface */
+/*
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -20,7 +22,7 @@
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  * 
- *********************************************************************/
+*/
 
 #include "cm.h"
 
@@ -34,6 +36,10 @@
 
 #define BUFFER_SIZE 512
 
+#ifdef PLIST
+static proplist_t dictionary = NULL;
+#endif
+
 static char lines[3][BUFFER_SIZE];
 static int width_used;
 
@@ -120,6 +126,11 @@
       lines[0][0] = lines[1][0] = lines[2][0] = 0;
       width_used = 0;
       break;
+#ifdef PLIST
+  case OF_PLIST:
+      dictionary = PLMakeDictionaryFromEntries(NULL, NULL, NULL);
+      break;
+#endif
   }
 
 }
@@ -138,6 +149,16 @@
     case OF_TEXTBLK:
       block_newline();
       break;
+#ifdef PLIST
+    case OF_PLIST:
+
+      /* we could save the dictionary to a named file  */
+      /* 	dictionary = PLSetFilename(dictionary, PLMakeString("output.plist")); */
+      /* 	PLSave(dictionary, NO); */
+      /* but instead for now we just print to stdout */
+      printf(PLGetDescription(dictionary));
+      break;
+#endif //PLIST
   }
 }
 
@@ -146,22 +167,27 @@
 void
 output(const char *lojban, const char *trans, const char *selmao)
 {
-  switch (ofmt) {
+  switch (ofmt) { 
     case OF_LATEX:
-    printf ("\\begin{tabular}[t]{l}"
-            "\\textbf{\\footnotesize %s}\\\\\n"
-            "\\textrm{\\footnotesize %s}\\\\\n"
-            "\\textit{\\footnotesize %s}\n"
-            "\\end{tabular}\n"
-            "\\rule{0in}{1.0\\baselineskip}",
-            lojban, selmao, trans);
+      printf ("\\begin{tabular}[t]{l}"
+          "\\textbf{\\footnotesize %s}\\\\\n"
+          "\\textrm{\\footnotesize %s}\\\\\n"
+          "\\textit{\\footnotesize %s}\n"
+          "\\end{tabular}\n"
+          "\\rule{0in}{1.0\\baselineskip}",
+          lojban, selmao, trans);
       break;
     case OF_TEXT:
-    printf ("%s <%s> [%s] ", lojban, selmao, trans);
+      printf ("%s <%s> [%s] ", lojban, selmao, trans);
       break;
     case OF_TEXTBLK:
       do_block(lojban, selmao, trans);
       break;
+#ifdef PLIST
+    case OF_PLIST:      
+      dictionary = PLInsertDictionaryEntry(dictionary, PLMakeString(lojban), PLMakeString(trans));
+      break;
+#endif //PLIST
   }
 }
 /* ================================================== */
@@ -190,10 +216,10 @@
 {
   switch (ofmt) {
     case OF_LATEX:
-    printf ("\\textrm{\\footnotesize %s}", text);
+      printf ("\\textrm{\\footnotesize %s}", text);
       break;
     case OF_TEXT:
-    printf ("(%s) ", text);
+      printf ("(%s) ", text);
       break;
     case OF_TEXTBLK:
       do_block("(", "(", "(");
diff -urN jbofihe-0.36/cmafihe.1 jbofihe-0.37/cmafihe.1
--- jbofihe-0.36/cmafihe.1	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/cmafihe.1	Wed Aug  8 22:41:46 2001
@@ -3,7 +3,7 @@
 cmafihe \- Lojban word glosser
 .SH SYNOPSIS
 .B cmafihe
-[ -l ] [ -b ] [ -w width ] [ -v ] [filename]
+[ -l ] [ -b ] [ -w width ] [ -p ] [ -v ] [filename]
 .SH DESCRIPTION
 .I cmafihe
 is a program that reads a piece of Lojban text.  It writes out the
@@ -19,6 +19,10 @@
 Produce output for typesetting with LaTeX.  The Lojban, selma'o and
 English translation are shown on separate lines for each word (or
 closely related group of words).
+.TP
+.B -p
+Output GNUStep property list containing vocabulary.  (Only available if cmafihe
+was compiled with the PLIST option).
 .TP
 .B -b
 Produce plain text output in 'block' format.
diff -urN jbofihe-0.36/cmavotab.c jbofihe-0.37/cmavotab.c
--- jbofihe-0.36/cmavotab.c	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/cmavotab.c	Wed Aug  8 22:41:46 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/cmavotab.c,v 1.6 2000/10/14 06:58:09 richard Exp $
+  $Header: /cvs/src/jbofihe/cmavotab.c,v 1.7 2001/08/08 21:33:07 richard Exp $
 
   Table of all cmavo, indexed by hash function computed in lex1.c.
   Most of this file was autogenerated by a perl script.
@@ -1621,7 +1621,7 @@
   { ""      , UNKNOWN, CM_UNKNOWN , SSM_UNKNOWN , ""                            }, /* 1590 */
   { ""      , UNKNOWN, CM_UNKNOWN , SSM_UNKNOWN , ""                            }, /* 1591 */
   { ""      , UNKNOWN, CM_UNKNOWN , SSM_UNKNOWN , ""                            }, /* 1592 */
-  { ""      , UNKNOWN, CM_UNKNOWN , SSM_UNKNOWN , ""                            }, /* 1593 */
+  { "xa\'o" , ZAhO   , CM_XAhO    , SSM_ZAhO    , "infrafective"                }, /* 1593 */
   { ""      , UNKNOWN, CM_UNKNOWN , SSM_UNKNOWN , ""                            }, /* 1594 */
   { ""      , UNKNOWN, CM_UNKNOWN , SSM_UNKNOWN , ""                            }, /* 1595 */
   { "xe"    , SE     , CM_XE      , SSM_SE      , "5th conversion"              }, /* 1596 */
diff -urN jbofihe-0.36/cmavotab.h jbofihe-0.37/cmavotab.h
--- jbofihe-0.36/cmavotab.h	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/cmavotab.h	Wed Aug  8 22:41:47 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/cmavotab.h,v 1.3 2000/06/15 20:50:18 richard Exp $
+  $Header: /cvs/src/jbofihe/cmavotab.h,v 1.4 2001/08/08 21:33:08 richard Exp $
 
   Definition of the record type used for the cmavo table.
   ***************************************/
@@ -588,6 +588,7 @@
   CM_VUhU = 1552,
   CM_VY = 1554,
   CM_XA = 1582,
+  CM_XAhO = 1593,
   CM_XE = 1596,
   CM_XI = 1610,
   CM_XO = 1624,
diff -urN jbofihe-0.36/config.pl jbofihe-0.37/config.pl
--- jbofihe-0.36/config.pl	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/config.pl	Wed Aug  8 22:41:47 2001
@@ -1,4 +1,4 @@
-# $Header: /cvs/src/jbofihe/config.pl,v 1.18 2001/03/21 23:04:38 richard Exp $
+# $Header: /cvs/src/jbofihe/config.pl,v 1.20 2001/08/03 20:39:54 richard Exp $
 #
 # To be run with perl.  This builds Makefile from Makefile.in,
 # substituting things whose location varies on different systems, and
@@ -29,7 +29,9 @@
 $debug=0;
 $profile=0;
 $mmap=1;
+$plist=0;
 $embed=0;
+$plist=0;
 
 while ($_ = shift @ARGV) {
     if (/^--help/ || /^-h/) {
@@ -41,6 +43,8 @@
         $prefix = shift @ARGV;
     } elsif (/^--profile$/) {
 		$profile = 1;
+    } elsif (/^--plist$/) {
+        $plist = 1;
     } elsif (/^--debug$/) {
 		$debug = 1;
 	} elsif (/^--nommap$/) {
@@ -50,13 +54,23 @@
     }
 }
 
-$optdebug = $debug ? "-g -Wall" : "-O2";
+
+if ($plist) {
+    $defines .= " -DPLIST";
+    $cmafihe_ldopts=" -lPropList";
+} else {
+    $cmafihe_ldopts="";
+}
+
+$optdebug = $debug ? " -g -Wall" : " -O2";
 if ($profile) {
     $optdebug .= " -pg";
 }
-$mmap_flag = $mmap ? "-DHAVE_MMAP=1" : "";
-$defines = $mmap_flag;
+$mmap_flag = $mmap ? " -DHAVE_MMAP=1" : "";
+$defines .= $mmap_flag;
 if ($debug) {
+
+
     $defines .= " -DEXPOSE_SIGNALS";
 }
 
@@ -70,9 +84,11 @@
 
 open(IN, "<Makefile.in");
 open(OUT, ">Makefile");
-while (<IN>) {
+while (<IN>) 
+{
     s/\@\@PREFIX\@\@/$prefix/eg;
-	s/\@\@OPTDEBUG\@\@/$optdebug/eg;
+    s/\@\@OPTDEBUG\@\@/$optdebug/eg;
+    s/\@\@CMAFIHE_LDOPTS\@\@/$cmafihe_ldopts/eg;
     s/\@\@DEFINES\@\@/$defines/eg;
     s/\@\@DICTDATA_C\@\@/$dictdata_c/eg;
     print OUT;
@@ -92,6 +108,7 @@
 --debug              Build a debuggable version
 --nommap             Don't use mmap for reading the dictionary file
 --embed              Embed minimal dictionary directly into jbofihe program
+--plist              Enable PropList output of vocabulary in cmafihe (requires libPropList from http://www.windowmaker.org) 
 EOF
 }
 
diff -urN jbofihe-0.36/contrib/emacs_functions jbofihe-0.37/contrib/emacs_functions
--- jbofihe-0.36/contrib/emacs_functions	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/contrib/emacs_functions	Wed Aug  8 22:41:51 2001
@@ -0,0 +1,19 @@
+;;bjoern gohla <b.gohla@gmx.de> 31.03.2001
+;;attach this to your emacs init file     
+;;you will probably want to adjust program path and options
+(defun lojban-gloss () ""                                  
+  (interactive "_")                                        
+  (shell-command-on-region (region-beginning) (region-end)                                        
+"~/lojban/jbofihe-0.35/cmafihe -b")                                        
+)
+
+(global-set-key "\C-x=" 'lojban-gloss)
+
+(defun lojban-parse () ""
+  (interactive "_")      
+  (shell-command-on-region (region-beginning) (region-end)      
+"~/lojban/jbofihe-0.35/jbofihe")      
+)
+
+(global-set-key "\C-x-" 'lojban-parse)
+
diff -urN jbofihe-0.36/dfasyn/Makefile jbofihe-0.37/dfasyn/Makefile
--- jbofihe-0.36/dfasyn/Makefile	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/Makefile	Wed Aug  8 22:41:51 2001
@@ -0,0 +1,47 @@
+# $Header: /cvs/src/jbofihe/dfasyn/Makefile,v 1.1 2001/07/12 21:15:35 richard Exp $
+#
+# Makefile for NFA->DFA conversion utility
+#
+# Copyright (C) Richard P. Curnow  2000-2001
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+# 
+#
+
+CC=gcc
+#CFLAGS=-g
+#CFLAGS=-O2 -pg
+CFLAGS=-O2
+OBJ = parse.o scan.o n2d.o expr.o tabcompr.o compdfa.o
+
+all : dfasyn
+
+dfasyn : $(OBJ)
+	$(CC) $(CFLAGS) -o dfasyn $(OBJ)
+
+parse.c parse.h : parse.y
+	bison -v -d parse.y
+	mv parse.tab.c parse.c
+	mv parse.tab.h parse.h
+
+parse.o : parse.c n2d.h
+
+scan.c : scan.l
+	flex -t -s scan.l > scan.c
+
+scan.o : scan.c parse.h n2d.h
+
+clean:
+	rm dfasyn *.o scan.c parse.c parse.h parse.output
+
diff -urN jbofihe-0.36/dfasyn/compdfa.c jbofihe-0.37/dfasyn/compdfa.c
--- jbofihe-0.36/dfasyn/compdfa.c	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/compdfa.c	Wed Aug  8 22:41:51 2001
@@ -0,0 +1,348 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/dfasyn/compdfa.c,v 1.1 2001/07/12 21:15:35 richard Exp $
+
+  Routines for compressing the DFA by commoning-up equivalent states
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+/*
+  The input to this stage is the 'raw' DFA build from the NFA by the subset
+  construction.  Depending on the style of the NFA, there may be large chunks
+  of the DFA that have equivalent functionality, in terms of resulting in the
+  same exit value for the same sequence of input tokens, but which are reached
+  by different prefixes.  The idea of this stage is to common up such regions,
+  to reduce the size of the DFA and hence the table sizes that are generated.
+
+  Conceptually, the basis of the algorithm is to assign the DFA states to
+  equivalence classes.  If there are N different exit values, there are
+  initially N+1 classes.  All states that can exit with a particular value are
+  placed in a class together, and all non-accepting states are placed together.
+  Now, a pass is made over all pairs of states.  Two states remain equivalent
+  if for each token, their outbound transitions go to states in the same class.
+  If the states do not stay equivalent, the class they were in is split
+  accordingly.  This is repeated again and again until no more bisections
+  occur.
+
+  The algorithm actually used is to assign an ordering to the states based on
+  their current class and outbound transitions.  The states are then sorted.
+  This allows all checking to be done on near-neighbours in the sequence
+  generated by the sort, which brings the execution time down to something
+  finite. 
+
+  */
+
+#include "n2d.h"
+
+static int last_eq_class; /* Next class to assign */
+static int Nt; /* Number of tokens; has to be made static to be visible to comparison fn. */
+
+/* To give 'general_compre' visibility of the current equiv. classes of the
+   destination states */
+static DFANode **local_dfas;
+
+static void calculate_signatures(DFANode **seq, DFANode **dfas, int ndfas)/*{{{*/
+/**** Determine state signatures based on transitions and current classes. ****/
+{
+  unsigned long sig;
+  int i, t;
+
+  for (i=0; i<ndfas; i++) {
+    DFANode *s = seq[i];
+    sig = 0UL;
+    for (t=0; t<Nt; t++) {
+      int di = s->map[t];
+      if (di >= 0) {
+        DFANode *d = dfas[di];
+        int deq_class = d->eq_class;
+      
+        sig = increment(sig, deq_class & 0xf); /* 16 bit pairs in sig */
+      }
+    }
+
+    s->signature = sig;
+  }
+}
+/*}}}*/
+static int general_compare(const void *a, const void *b)/*{{{*/
+/************************* Do full compare on states *************************/
+{
+  Castderef (a, const DFANode *, aa);
+  Castderef (b, const DFANode *, bb);
+
+  if (aa->eq_class < bb->eq_class) {
+    return -1;
+  } else if (aa->eq_class > bb->eq_class) {
+    return +1;
+  } else if (aa->signature < bb->signature) {
+    return -1;
+  } else if (aa->signature > bb->signature) {
+    return +1;
+  } else {
+    /* The hard way... */
+    int i;
+    for (i=0; i<Nt; i++) {
+      int am = aa->map[i];
+      int bm = bb->map[i];
+      
+      /* Map transition destinations to the current equivalence class of the
+         destination state (otherwise compressor is very pessimistic). */
+      am = (am>=0) ? local_dfas[am]->eq_class: -1;
+      bm = (bm>=0) ? local_dfas[bm]->eq_class: -1;
+
+      if      (am < bm) return -1;
+      else if (am > bm) return +1;
+    }
+
+  }
+
+  /* If you get here, the states are still equivalent */
+  return 0;
+
+}
+/*}}}*/
+static int split_classes(DFANode **seq, DFANode **dfas, int ndfas)/*{{{*/
+/*********************** Do one pass of class splitting ***********************/
+{
+  int i;
+  int had_to_split = 0;
+  
+  calculate_signatures(seq, dfas, ndfas);
+  qsort(seq, ndfas, sizeof(DFANode *), general_compare);
+  
+  seq[0]->new_eq_class = seq[0]->eq_class;
+  
+  for (i=1; i<ndfas; i++) {
+    seq[i]->new_eq_class = seq[i]->eq_class;
+
+    if (seq[i]->eq_class == seq[i-1]->eq_class) {
+      /* May need to split, otherwise states were previously separated anyway
+         */
+      
+      if (general_compare(seq+i, seq+i-1) != 0) {
+        /* Different transition pattern, split existing equivalent class */
+        had_to_split = 1;
+        seq[i]->new_eq_class = ++last_eq_class;
+        if (verbose) fprintf(stderr, "Found %d equivalence classes\r", last_eq_class+1);
+      } else {
+        /* This works even if seq[i-1] was assigned a new class due to
+           splitting from seq[i-2] etc. */
+        seq[i]->new_eq_class = seq[i-1]->new_eq_class;
+      }
+    }
+  }
+
+  /* Set classes to new class values. */
+  for (i=0; i<ndfas; i++) {
+    seq[i]->eq_class = seq[i]->new_eq_class;
+  }
+
+  return had_to_split;
+
+}
+/*}}}*/
+static int initial_compare(const void *a, const void *b)/*{{{*/
+/************************** Sort based on exit value **************************/
+{
+  Castderef (a, const DFANode *, aa);
+  Castderef (b, const DFANode *, bb);
+  int status;
+  int aok, bok;
+
+  if (!aa->result && bb->result) {
+    /* Put all non-accepting states first in sort order */
+    return -1;
+  } else if (aa->result && !bb->result) {
+    return +1;
+  } else if (!aa->result && !bb->result) {
+    return 0;
+  } else {
+    status = strcmp(aa->result, bb->result);
+    if      (status < 0) return -1;
+    else if (status > 0) return +1;
+    else {
+      aok = (aa->attribute != 0);
+      bok = (bb->attribute != 0);
+      if (!aok && bok) {
+        return -1;
+      } else if (aok && !bok) {
+        return +1;
+      } else if (!aok && !bok)  {
+        return 0;
+      } else {
+        return strcmp(aa->attribute, bb->attribute);
+      }
+    }
+  }
+}
+/*}}}*/
+static void assign_initial_classes(DFANode **seq, int ndfas)/*{{{*/
+/******************* Determine initial equivalence classes. *******************/
+{
+  int i;
+  qsort(seq, ndfas, sizeof(DFANode *), initial_compare);
+  
+  last_eq_class = 0;
+  
+  seq[0]->eq_class = last_eq_class;
+
+  for (i=1; i<ndfas; i++) {
+    if (initial_compare(seq+i-1, seq+i) != 0) {
+      /* Not same as previous entry, assign a new class */
+      seq[i]->eq_class = ++last_eq_class;
+    } else {
+      /* Same class as last entry */
+      seq[i]->eq_class = last_eq_class;
+    }
+  }
+}
+/*}}}*/
+static int compress_states(DFANode **dfas, int ndfas)/*{{{*/
+/***** Compress the DFA so there is precisely one state in each eq. class *****/
+{
+  int *reps;
+  int i, j, t;
+  int neqc;
+  int new_index;
+
+  neqc = 1 + last_eq_class;
+
+  /* Array containing which state is the representative of each eq. class.
+     Keep the state which had the lowest array index. */
+  reps = new_array(int, neqc);
+  
+  for (i=0; i<neqc; i++) reps[i] = -1; /* undefined */
+
+  /* Go through DFA states to find the representative of each class. */
+  for (i=0; i<ndfas; i++) {
+    int eqc = dfas[i]->eq_class;
+    if (reps[eqc] < 0) {
+      reps[eqc] = i;
+      dfas[i]->is_rep = 1;
+    } else {
+      dfas[i]->is_rep = 0;
+    }
+  }
+
+  /* Go through DFA states and assign new indices. */
+  for (i=0, new_index=0; i<ndfas; i++) {
+    if (dfas[i]->is_rep) {
+      dfas[i]->new_index = new_index++;
+      if (report) fprintf(report, "Old DFA state %d becomes %d\n", i, dfas[i]->new_index);
+    } else {
+      int eqc = dfas[i]->eq_class;
+      int rep = reps[eqc];
+
+      /* This assignment works because the representative for the class
+         must have been done earlier in the loop. */
+      dfas[i]->new_index = dfas[rep]->new_index;
+
+      if (report) fprintf(report, "Old DFA state %d becomes %d (formerly %d)\n", i, dfas[i]->new_index, rep);
+    }
+  }
+  
+  /* Go through all transitions and fix them up. */
+  for (i=0; i<ndfas; i++) {
+    DFANode *s = dfas[i];
+    for (t=0; t<Nt; t++) {
+      int dest = s->map[t];
+      if (dest >= 0) {
+        s->map[t] = dfas[dest]->new_index;
+      }
+    }
+  }
+
+  /* Go through and crunch the entries in the DFA array, fixing up the indices */
+  for (i=j=0; i<ndfas; i++) {
+    if (dfas[i]->is_rep) {
+      dfas[j] = dfas[i];
+      dfas[j]->index = dfas[j]->new_index;
+      j++;
+    }
+  }
+
+  free(reps);
+  return neqc;
+}
+/*}}}*/
+static void discard_nfa_bitmaps(DFANode **dfas, int ndfas)/*{{{*/
+/********** Discard the (now inaccurate) NFA bitmaps from the states **********/
+{
+  int i;
+  for (i=0; i<ndfas; i++) {
+    free(dfas[i]->nfas);
+    dfas[i]->nfas = NULL;
+  }
+  return;
+}
+/*}}}*/
+static void print_classes(DFANode **dfas, int ndfas)/*{{{*/
+{
+  int i;
+#if 1
+  /* Comment out to print this stuff for debug */
+  return;
+#endif
+  if (!report) return;
+  fprintf(report, "Equivalence classes are :\n");
+  for (i=0; i<ndfas; i++) {
+    fprintf(report, "State %d class %d\n", i, dfas[i]->eq_class);
+  }
+  fprintf(report, "\n");
+  return;
+}
+/*}}}*/
+int compress_dfa(DFANode **dfas, int ndfas, int ntokens)/*{{{*/
+/************************ The main callable interface. ************************/
+{
+  DFANode **seq; /* Storage for node sequence */
+  int i;
+  int last_eqc;
+  int had_to_split;
+  int new_ndfas;
+
+  /* Safety net */
+  if (ndfas <= 0) return;
+
+  local_dfas = dfas;
+  Nt = ntokens;
+  
+  seq = new_array(DFANode *, ndfas);
+  for (i=0; i<ndfas; i++) {
+    seq[i] = dfas[i];
+  }
+
+  assign_initial_classes(seq, ndfas);
+
+  do {
+    print_classes(dfas, ndfas);
+    had_to_split = split_classes(seq, dfas, ndfas);
+  } while (had_to_split);
+
+  print_classes(dfas, ndfas);
+
+  new_ndfas = compress_states(dfas, ndfas);
+  discard_nfa_bitmaps(dfas, new_ndfas);
+
+  free(seq);
+  return new_ndfas;
+
+}
+/*}}}*/
+
diff -urN jbofihe-0.36/dfasyn/dfasyn.texi jbofihe-0.37/dfasyn/dfasyn.texi
--- jbofihe-0.36/dfasyn/dfasyn.texi	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/dfasyn.texi	Wed Aug  8 22:41:51 2001
@@ -0,0 +1,85 @@
+@setfilename dfasyn.info
+@settitle User guide for the dfasyn DFA construction utility
+
+@titlepage
+@title dfasyn user guide
+@subtitle This manual describes how to use dfasyn.
+@author Richard P. Curnow
+@page
+@end titlepage
+
+@c{{{ Top node
+@node Top
+@top
+@menu
+* Introduction:: The introduction
+* Input file format:: A reference for the input file
+* Concept Index:: Index of concepts
+@end menu
+@c}}}
+@c{{{ ch:Introduction
+@node Introduction
+@chapter Introduction
+
+@menu
+* Uses for dfasyn:: The types of problem to which dfasyn is well-suited
+@end menu
+
+@node Uses for dfasyn
+@section Uses for dfasyn
+dfasyn is particularly suited to the following types of scanning problem, both of
+which exceed flex's capabilities
+
+@itemize @bullet
+@item When the pattern describing a token cannot be written as a regular
+expression.  For example, there may be iteration but with constraints between
+the end of one iteration and the start of the next.
+@item When more than 1 rule matches in a flex input file, flex chooses between
+them based on
+
+  @itemize -
+  @item Longest match first
+  @item Earliest rule in the file if more than 1 match of the same length exists
+  @end itemize
+
+dfasyn allows for a more general method of resolving multiple matches.
+Conceptually, it works out which rules match, giving a true/false status for
+each rule.  The input file defines an arbitrarily complex set of boolean
+expressions to reduce the multiple matches down to one unique one.  (If more than
+one of the boolean expressions evaluates true, this is an error.)
+
+@item When a customised method is required to construct the input tokens that
+pass to the scanner.  For example, if the tokens are the characters in a string
+(rather than coming from a file), or if some special logic has to be used to
+generate the tokens from the input character stream.
+  
+@item If you want to add actions to the scanning loop, e.g. to remember special
+locations within the word being scanned.
+  
+@end itemize
+
+@node Non-uses for dfasyn
+@section Cases where flex might be better
+
+In general, flex is easier and more convenient to use.  Where it is applicable
+to your problem, there are no obvious benefits to using dfasyn.
+
+@node Why written
+@section Why was dfasyn written?
+@c}}}
+
+@c{{{ ch:Input file format
+@node Input file format
+@chapter Input file format
+This section describes the format of the input file.
+
+@c}}}
+
+
+@node Concept Index
+@unnumbered Concept Index
+@printindex cp
+@bye
+
+@c vim:syntax=OFF:fdm=marker:fdc=4:cms=@c%s
+
diff -urN jbofihe-0.36/dfasyn/expr.c jbofihe-0.37/dfasyn/expr.c
--- jbofihe-0.36/dfasyn/expr.c	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/expr.c	Wed Aug  8 22:41:51 2001
@@ -0,0 +1,391 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/dfasyn/expr.c,v 1.1 2001/07/12 21:15:35 richard Exp $
+
+  Routines for merging and prioritising exit tags and attribute tags
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+/* Handle boolean expressions used to determine the final scanner result from
+   the set of NFA accepting states that are simultaneously active at the end of
+   the scan.  */
+
+#include "n2d.h"
+
+enum ExprType {
+  E_AND, E_OR, E_XOR, E_COND, E_NOT, E_WILD, E_SYMBOL
+};
+
+struct Symbol;
+
+struct Expr {
+  enum ExprType type;
+  union {
+    struct { struct Expr *c1, *c2; } and; 
+    struct { struct Expr *c1, *c2; } or; 
+    struct { struct Expr *c1, *c2; } xor; 
+    struct { struct Expr *c1, *c2, *c3; } cond; 
+    struct { struct Expr *c1; } not; 
+    struct { int pad; } wild; 
+    struct { char *name; struct Symbol *s; } symbol;
+  } data;
+};
+
+struct Symbol {
+  char *name;
+  int is_expr;
+  union {
+    Expr *e;
+    int val;
+  } data;
+};
+
+struct SymbolList {
+  struct SymbolList *next;
+  struct Symbol *sym;
+};
+
+struct Result {
+  char *result; /* The string to write to the output file */
+  /* The boolean expression that defines whether the result is active */
+  Expr *e;
+  /* If != 0, assume the state machine that the program's output is embedded in
+     will exit immediately if this result occurs.  This may allow lots of
+     states to be culled from the DFA. */
+  int early; 
+};
+
+typedef struct Result Result;
+typedef struct Symbol Symbol;
+typedef struct SymbolList SymbolList;
+
+struct evaluator {
+  SymbolList *symbols;
+  Result *results;
+  int n_results;
+  int max_results;
+  /* Flag indicating whether any results evaluated so far have evaluated true.
+     (Used for implementing wildcard expression).  */
+  int any_results_so_far;
+  char *name;
+  char *defresult;
+  char *result_type;
+};
+
+/* Evaluator used to determine exit value of automaton, if the last input
+ * char appears in a particular state */
+Evaluator *exit_evaluator;
+
+/* Evaluator used to determine attribute to apply to a DFA state, given those
+ * that apply to its constituent NFA states. */
+Evaluator *attr_evaluator;
+
+Evaluator* create_evaluator(char *name)/*{{{*/
+{
+  Evaluator *x = new(struct evaluator);
+  x->symbols = NULL;
+  x->results = NULL;
+  x->n_results = x->max_results = 0;
+  x->any_results_so_far = 0;
+  x->name = new_string(name);
+  x->defresult = NULL;
+  x->result_type = NULL;
+  return x;
+}
+/*}}}*/
+void destroy_evaluator(Evaluator *x)/*{{{*/
+{
+  /* Just leak memory for now, no need to clean up. */
+  return;
+}
+/*}}}*/
+void define_defresult(Evaluator *x, char *text)/*{{{*/
+{
+  x->defresult = new_string(text);
+}
+/*}}}*/
+void define_type(Evaluator *x, char *text)/*{{{*/
+{
+  x->result_type = new_string(text); 
+}
+/*}}}*/
+char* get_defresult(Evaluator *x)/*{{{*/
+{
+  if (x->defresult) {
+    return x->defresult;
+  } else {
+    fprintf(stderr, "WARNING: Default %s used with no definition, \"0\" assumed\n", x->name);
+    return "0";
+  }
+}
+/*}}}*/
+char* get_result_type(Evaluator *x)/*{{{*/
+{
+  return x->result_type ? x->result_type : "short";
+}
+/*}}}*/
+static void add_new_symbol(Evaluator *x, Symbol *s)/*{{{*/
+{
+  SymbolList *nsl = new(SymbolList);
+  nsl->sym = s;
+  nsl->next = x->symbols;
+  x->symbols = nsl;
+}
+  /*}}}*/
+static void grow_results(Evaluator *x)/*{{{*/
+{
+  if (x->n_results == x->max_results) {
+    x->max_results += 32;
+    x->results = resize_array(Result, x->results, x->max_results);
+  }
+}
+/*}}}*/
+
+Expr * new_wild_expr(void)/*{{{*/
+{
+  Expr *r = new(Expr);
+  r->type = E_WILD;
+  return r; 
+}
+/*}}}*/
+Expr * new_not_expr(Expr *c)/*{{{*/
+{
+  Expr *r = new(Expr);
+  r->type = E_NOT;
+  r->data.not.c1 = c;
+  return r; 
+}
+/*}}}*/
+Expr * new_and_expr(Expr *c1, Expr *c2)/*{{{*/
+{
+  Expr *r = new(Expr);
+  r->type = E_AND;
+  r->data.and.c1 = c1;
+  r->data.and.c2 = c2;
+  return r; 
+}
+/*}}}*/
+Expr * new_or_expr(Expr *c1, Expr *c2)/*{{{*/
+{
+  Expr *r = new(Expr);
+  r->type = E_OR;
+  r->data.or.c1 = c1;
+  r->data.or.c2 = c2;
+  return r; 
+}
+/*}}}*/
+Expr * new_xor_expr(Expr *c1, Expr *c2)/*{{{*/
+{
+  Expr *r = new(Expr);
+  r->type = E_XOR;
+  r->data.xor.c1 = c1;
+  r->data.xor.c2 = c2;
+  return r; 
+}
+/*}}}*/
+Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3)/*{{{*/
+{
+  Expr *r = new(Expr);
+  r->type = E_COND;
+  r->data.cond.c1 = c1;
+  r->data.cond.c2 = c2;
+  r->data.cond.c3 = c3;
+  return r; 
+}
+/*}}}*/
+static Symbol *  find_symbol_or_create(Evaluator *x, char *sym_name)/*{{{*/
+{
+  int i;
+  Symbol *s;
+  SymbolList *sl;
+  for (sl=x->symbols; sl; sl=sl->next) {
+    s = sl->sym;
+    if (!strcmp(s->name, sym_name)) {
+      return s;
+    }
+  }
+  
+  s = new(Symbol);
+  add_new_symbol(x,s);
+  s->is_expr = 0; /* Until proven otherwise */
+  s->name = new_string(sym_name);
+  return s;
+}
+/*}}}*/
+
+Expr * new_sym_expr(char *sym_name)/*{{{*/
+/* Return expr for symbol name if it already exist, else create.  Don't bind to
+   actual symbol instance yet.  At the stage of parsing where this function is
+   used, we don't know yet which symbol table the symbol has to exist in.  */
+{
+  Expr *r;
+
+  r = new(Expr);
+  r->type = E_SYMBOL;
+  r->data.symbol.name = new_string(sym_name);
+  r->data.symbol.s = NULL; /* Force binding at first use */
+  return r; 
+}
+/*}}}*/
+void define_result(Evaluator *x, char *string, Expr *e, int early)/*{{{*/
+/*++++++++++++++++++++
+  Add a result defn.  If the expr is null, it means build a single expr corr.
+  to the value of the symbol with the same name as the result string.
+  ++++++++++++++++++++*/
+{
+  int i;
+  Result *r;
+
+  grow_results(x);
+  r = &(x->results[x->n_results++]);
+  r->result = new_string(string);
+  r->early = early;
+  if (e) {
+    r->e = e;
+  } else {
+    Expr *ne;
+    ne = new_sym_expr(string);
+    r->e = ne;
+  }
+
+  return;
+}
+/*}}}*/
+void define_symbol(Evaluator *x, char *name, Expr *e)/*{{{*/
+/*++++++++++++++++++++
+  Define an entry in the symbol table.
+  ++++++++++++++++++++*/
+{
+  Symbol *s;
+  s = find_symbol_or_create(x, name);
+  s->data.e = e;
+  s->is_expr = 1;
+  return;
+}
+/*}}}*/
+  
+void define_symresult(Evaluator *x, char *name, Expr *e, int early)/*{{{*/
+/*++++++++++++++++++++
+  Define an entry in the symbol table, and a result with the same name.
+  ++++++++++++++++++++*/
+{
+  define_symbol(x, name, e);
+  define_result(x, name, e, early);
+  return;
+}
+/*}}}*/
+void clear_symbol_values(Evaluator *x)/*{{{*/
+{
+  SymbolList *sl;
+  for (sl=x->symbols; sl; sl=sl->next) {
+    Symbol *s = sl->sym;
+    if (0 == s->is_expr) {
+      s->data.val = 0;
+    }
+  }
+  x->any_results_so_far = 0;
+}
+/*}}}*/
+void set_symbol_value(Evaluator *x, char *sym_name)/*{{{*/
+{
+  Symbol *s;
+
+  s = find_symbol_or_create(x, sym_name);
+  if (s->is_expr) {
+    fprintf(stderr, "Cannot set value for symbol '%s', it is defined by an expression\n");
+    exit(2);
+  } else {
+    s->data.val = 1;
+  }
+}
+/*}}}*/
+static int eval(Evaluator *x, Expr *e)/*{{{*/
+/*++++++++++++++++++++
+  Evaluate the value of an expr
+  ++++++++++++++++++++*/
+{
+  switch (e->type) {
+    case E_AND:
+      return eval(x, e->data.and.c1) && eval(x, e->data.and.c2);
+    case E_OR:
+      return eval(x, e->data.or.c1) || eval(x, e->data.or.c2);
+    case E_XOR:
+      return eval(x, e->data.xor.c1) ^ eval(x, e->data.xor.c2);
+    case E_COND:
+      return eval(x, e->data.cond.c1) ? eval(x, e->data.cond.c2) : eval(x, e->data.cond.c3);
+    case E_NOT:
+      return !eval(x, e->data.not.c1);
+    case E_WILD:
+      return x->any_results_so_far;
+    case E_SYMBOL:
+      {
+        Symbol *s = e->data.symbol.s;
+        if (!s) {
+          /* Not bound yet */
+          e->data.symbol.s = s = find_symbol_or_create(x, e->data.symbol.name);
+        }
+        if (s->is_expr) {
+          return eval(x, s->data.e);
+        } else {
+          return s->data.val;
+        }
+      }
+    default:
+      fprintf(stderr, "Interal error : Can't get here!\n");
+      exit(2);
+  }
+}
+/*}}}*/
+int evaluate_result(Evaluator *x, char **result, int *result_early)/*{{{*/
+/*++++++++++++++++++++
+  Evaluate the result which holds given the symbols that are set
+  ++++++++++++++++++++*/
+{
+  int i;
+  int matched = -1;
+  for (i=0; i<x->n_results; i++) {
+    if (eval(x, x->results[i].e)) {
+      if (x->any_results_so_far) {
+        *result = NULL;
+        return 0;
+      } else {
+        x->any_results_so_far = 1;
+        matched = i;
+      }
+    }
+  }
+
+  if (matched < 0) {
+    *result = NULL;
+    if (result_early) *result_early = 0;
+    return 1;
+  } else {
+    *result = x->results[matched].result;
+    if (result_early) *result_early = x->results[matched].early;
+    return 1;
+  }
+}
+/*}}}*/
+/* Initialisation */
+void eval_initialise(void)/*{{{*/
+{
+  exit_evaluator = create_evaluator("result");
+  attr_evaluator = create_evaluator("attribute");
+}
+/*}}}*/
diff -urN jbofihe-0.36/dfasyn/n2d.c jbofihe-0.37/dfasyn/n2d.c
--- jbofihe-0.36/dfasyn/n2d.c	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/n2d.c	Wed Aug  8 22:41:52 2001
@@ -0,0 +1,1551 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/dfasyn/n2d.c,v 1.1 2001/07/12 21:15:35 richard Exp $
+
+  Main program for NFA to DFA table builder program.
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2000-2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+/* {{{ General comments
+  Convert a nondeterminstic finite automaton (NFA) into a deterministic finite
+  automaton (DFA).
+
+  The NFA is defined in terms of a set of states, with transitions between the
+  states.  The transitions may occur on any one of a set of symbols (specified
+  with | characters between the options), or may be 'epsilon' transitions, i.e.
+  occurring without consumption of any input.  A state may have multiple
+  transitions for the same input symbol (hence 'nondeterministic').  The final
+  state encountered within the final block defined in the input file is taken
+  to be the start state of the whole NFA.  A state may be entered more than
+  once in the file; the transitions in the multiple definitions are combined to
+  give the complete transition set.  A state may have an exit value assigned
+  (with =); this is the return value of the automaton if the end of string is
+  encountered when in that state.  (If the resulting DFA can be in multiple
+  exiting NFA states when the end of string is reached, the result is all the
+  associated NFA exit values or'd together, so it is best to use distinct bits
+  for NFA exit values unless it is known that is safe not to in a particular
+  case.) The input grammar allows a BLOCK <name> ... ENDBLOCK construction +
+  block instantiation.  This allows common parts of the NFA state machine to be
+  reused in multiple places as well as aiding structuring and readability.  See
+  morf_nfa.in for an example of the input grammar, and morf.c for a
+  (non-trivial) example of how to build the automaton around the tables that
+  this script generates.
+  }}} */
+
+#include <ctype.h>
+#include "n2d.h"
+
+/* Globally visible options to control reporting */
+FILE *report;
+FILE *input;
+FILE *output;
+extern FILE *yyin;
+extern FILE *yyout;
+int verbose;
+
+static Block **blocks = NULL;
+static int nblocks = 0;
+static int maxblocks = 0;
+
+static char **toktable=NULL;
+static int ntokens = 0;
+static int maxtokens = 0;
+
+struct Abbrev {
+  char *lhs; /* Defined name */
+  char **rhs; /* Token/define */
+  int nrhs;
+  int maxrhs;
+};
+
+static struct Abbrev *abbrevtable=NULL;
+static int nabbrevs = 0;
+static int maxabbrevs = 0;
+
+/* ================================================================= */
+static void grow_tokens(void)/*{{{*/
+{
+  maxtokens += 32;
+  toktable = resize_array(char *, toktable, maxtokens);
+}
+/*}}}*/
+static int create_token(char *name)/*{{{*/
+{
+  int result;
+  if (ntokens == maxtokens) {
+    grow_tokens();
+  }
+  result = ntokens++;
+  toktable[result] = new_string(name);
+  return result;
+}
+/*}}}*/
+int lookup_token(char *name, int create)/*{{{*/
+{
+  int found = -1;
+  int i;
+  for (i=0; i<ntokens; i++) {
+    if (!strcmp(toktable[i], name)) {
+      found = i;
+      break;
+    }
+  }
+
+  switch (create) {
+    case USE_OLD_MUST_EXIST:
+      if (found < 0) {
+        fprintf(stderr, "Token '%s' was never declared\n", name);
+        exit(1);
+      }        
+      break;
+    case CREATE_MUST_NOT_EXIST:
+      if (found >= 0) {
+        fprintf(stderr, "Token '%s' already declared\n", name);
+        exit(1);
+      } else {
+        found = create_token(name);
+      }
+      break;
+    case CREATE_OR_USE_OLD:
+      if (found < 0) {
+        found = create_token(name);
+      }
+      break;
+  }
+  
+  return found;
+}
+/*}}}*/
+/* ================================================================= */
+static void grow_abbrevs(void)/*{{{*/
+{
+  maxabbrevs += 32;
+  abbrevtable = resize_array(struct Abbrev, abbrevtable, maxabbrevs);
+}
+/*}}}*/
+struct Abbrev * create_abbrev(char *name)/*{{{*/
+{
+  struct Abbrev *result;
+  if (nabbrevs == maxabbrevs) {
+    grow_abbrevs();
+  }
+  result = abbrevtable + (nabbrevs++);
+  result->lhs = new_string(name);
+  result->nrhs = result->maxrhs = 0;
+  result->rhs = 0;
+  return result;
+}
+/*}}}*/
+void add_tok_to_abbrev(struct Abbrev *abbrev, char *tok)/*{{{*/
+{
+  if (abbrev->nrhs == abbrev->maxrhs) {
+    abbrev->maxrhs += 8;
+    abbrev->rhs = resize_array(char *, abbrev->rhs, abbrev->maxrhs);
+  }
+
+  abbrev->rhs[abbrev->nrhs++] = new_string(tok);
+}
+/*}}}*/
+static struct Abbrev * lookup_abbrev(char *name, int create)/*{{{*/
+{
+  int found = -1;
+  int i;
+  struct Abbrev *result = NULL;
+  /* Scan table in reverse order.  If a name has been redefined,
+     make sure the most recent definition is picked up. */
+  for (i=nabbrevs-1; i>=0; i--) {
+    if (!strcmp(abbrevtable[i].lhs, name)) {
+      found = i;
+      result = abbrevtable + found;
+      break;
+    }
+  }
+
+  switch (create) {
+    case CREATE_MUST_NOT_EXIST:
+      if (found >= 0) {
+        fprintf(stderr, "Abbreviation '%s' already declared\n", name);
+        exit(1);
+      } else {
+        result = create_abbrev(name);
+      }
+      break;
+    case CREATE_OR_USE_OLD:
+      if (found < 0) {
+        result = create_abbrev(name);
+      }
+      break;
+  }
+  
+  return result;
+}
+/*}}}*/
+/* ================================================================= */
+
+struct Attribute {
+  char *name;
+};
+
+static struct Attribute *attributes = NULL;
+static int n_attributes = 0;
+static int max_attributes = 0;
+
+static void grow_attributes(void)/*{{{*/
+{
+  max_attributes += 16;
+  attributes = resize_array(struct Attribute, attributes, max_attributes);
+}
+/*}}}*/
+static int create_attribute(char *name)/*{{{*/
+{
+  int n;
+  if (n_attributes == max_attributes) {
+    grow_attributes();
+  }
+  
+  n = n_attributes;
+  attributes[n].name = new_string(name);
+  n_attributes++;
+  return n;
+}
+/*}}}*/
+static int lookup_attribute(char *name)/*{{{*/
+  /* Always create if not found */
+{
+  int i;
+  for (i=0; i<n_attributes; i++) {
+    if (!strcmp(name, attributes[i].name)) {
+      return i;
+    }
+  }
+  return create_attribute(name);
+}
+/*}}}*/
+/* ================================================================= */
+static void grow_blocks(void)/*{{{*/
+{
+  maxblocks += 32;
+  blocks = resize_array(Block*, blocks, maxblocks);
+}
+/*}}}*/
+static Block * create_block(char *name)/*{{{*/
+{
+  Block *result;
+  int i;
+  
+  if (nblocks == maxblocks) {
+    grow_blocks();
+  }
+  
+#if 0  
+  /* Not especially useful to show this */
+  if (verbose) {
+    fprintf(stderr, " %s", name);
+  }
+#endif
+  
+  result = blocks[nblocks++] = new(Block);
+  result->name = new_string(name);
+  for (i=0; i<HASH_BUCKETS; i++) { 
+    result->state_hash[i].states = NULL;
+    result->state_hash[i].nstates = 0;
+    result->state_hash[i].maxstates = 0;
+  }
+  result->states = NULL;
+  result->nstates = result->maxstates = 0;
+
+  result->subcount = 1;
+  return result;
+}
+/*}}}*/
+Block * lookup_block(char *name, int create)/*{{{*/
+{
+  Block *found = NULL;
+  int i;
+  for (i=0; i<nblocks; i++) {
+    if (!strcmp(blocks[i]->name, name)) {
+      found = blocks[i];
+      break;
+    }
+  }
+
+  switch (create) {
+    case USE_OLD_MUST_EXIST:
+      if (!found) {
+        fprintf(stderr, "Could not find block '%s' to instantiate\n", name);
+        exit(1);
+      }        
+      break;
+    case CREATE_MUST_NOT_EXIST:
+      if (found) {
+        fprintf(stderr, "Already have a block called '%s', cannot redefine\n", name);
+        exit(1);
+      } else {
+        found = create_block(name);
+      }
+      break;
+    case CREATE_OR_USE_OLD:
+      if (!found) {
+        found = create_block(name);
+      }
+      break;
+  }
+  
+  return found;
+}
+/*}}}*/
+/* ================================================================= */
+static void maybe_grow_states(Block *b, int hash)/*{{{*/
+{
+  Stateset *ss = b->state_hash + hash;
+  if (ss->nstates == ss->maxstates) {
+    ss->maxstates += 8;
+    ss->states = resize_array(State*, ss->states, ss->maxstates);
+  }
+  if (b->nstates == b->maxstates) {
+    b->maxstates += 32;
+    b->states = resize_array(State*, b->states, b->maxstates);
+  }
+  
+}
+/*}}}*/
+/* ================================================================= */
+static unsigned long hashfn(const char *s)/*{{{*/
+{
+  unsigned long y = 0UL, v, w, x, k;
+  unsigned long yl, yh;
+  const char *t = s;
+  while (1) {
+    k = (unsigned long) *(unsigned char *)(t++);
+    if (!k) break;
+    v = ~y;
+    w = y<<13;
+    x = v>>6;
+    y = w ^ x;
+    y += k;
+  }
+  y ^= (y>>13);
+  y &= HASH_MASK;
+  return y;
+}
+/*}}}*/
+static State * create_state(Block *b, char *name)/*{{{*/
+{
+  State *result;
+  int hash;
+  Stateset *ss;
+  hash = hashfn(name);
+  maybe_grow_states(b, hash);
+  ss = b->state_hash + hash;
+  result = b->states[b->nstates++] = ss->states[ss->nstates++] = new(State);
+  result->name = new_string(name);
+  result->parent = b;
+  result->index = b->nstates - 1;
+  result->transitions = NULL;
+  result->exitvals = NULL;
+  result->attributes = NULL;
+  result->ordered_trans = NULL;
+  result->n_transitions = 0;
+  result->removed = 0;
+  return result;
+}
+/*}}}*/
+State * lookup_state(Block *b, char *name, int create)/*{{{*/
+{
+  State *found = NULL;
+  int i;
+  int hash;
+  Stateset *ss;
+
+  hash = hashfn(name);
+  ss = b->state_hash + hash;
+  
+  for (i=0; i<ss->nstates; i++) {
+    if (!strcmp(ss->states[i]->name, name)) {
+      found = ss->states[i];
+      break;
+    }
+  }
+
+  switch (create) {
+    case USE_OLD_MUST_EXIST:
+      if (!found) {
+        fprintf(stderr, "Could not find a state '%s' in block '%s' to transition to\n", name, b->name);
+        exit(1);
+      }        
+      break;
+    case CREATE_MUST_NOT_EXIST:
+      if (found) {
+        fprintf(stderr, "Warning : already have a state '%s' in block '%s'\n", name, b->name);
+      } else {
+        found = create_state(b, name);
+      }
+      break;
+    case CREATE_OR_USE_OLD:
+      if (!found) {
+        found = create_state(b, name);
+      }
+      break;
+  }
+  
+  return found;
+}
+/*}}}*/
+/* ================================================================= */
+Stringlist * add_token(Stringlist *existing, char *token)/*{{{*/
+{
+  Stringlist *result = new(Stringlist);
+  if (token) {
+    result->string = new_string(token);
+  } else {
+    result->string = NULL;
+  }
+  result->next = existing;
+  return result;
+}
+/*}}}*/
+static void add_transition(State *curstate, char *str, char *destination)/*{{{*/
+/* Add a single transition to the state.  Allow definitions to be
+   recursive */
+{
+  struct Abbrev *abbrev;
+  abbrev = (str) ? lookup_abbrev(str, USE_OLD_MUST_EXIST) : NULL;
+  if (abbrev) {
+    int i;
+    for (i=0; i<abbrev->nrhs; i++) {
+      add_transition(curstate, abbrev->rhs[i], destination);
+    }
+  } else {
+    Translist *tl;
+    tl = new(Translist);
+    tl->next = curstate->transitions;
+    /* No problem with aliasing, these strings are read-only and have
+       lifetime = until end of program */
+    tl->token = (str) ? lookup_token(str, USE_OLD_MUST_EXIST) : -1;
+    tl->ds_name = destination;
+    curstate->transitions = tl;
+  }
+}
+/*}}}*/
+void add_transitions(State *curstate, Stringlist *tokens, char *destination)/*{{{*/
+{
+  Stringlist *sl;
+  struct Abbrev *abbrev;
+  for (sl=tokens; sl; sl=sl->next) {
+    add_transition(curstate, sl->string, destination);
+  }
+}
+/*}}}*/
+State * add_transitions_to_internal(Block *curblock, State *addtostate, Stringlist *tokens)/*{{{*/
+{
+  char buffer[1024];
+  State *result;
+  sprintf(buffer, "#%d", curblock->subcount++);
+  result = lookup_state(curblock, buffer, CREATE_MUST_NOT_EXIST);
+  add_transitions(addtostate, tokens, result->name);
+  return result;
+}
+/*}}}*/
+void add_exit_value(State *curstate, char *value)/*{{{*/
+{
+  Stringlist *sl;
+  sl = new(Stringlist);
+  sl->string = value;
+  sl->next = curstate->exitvals;
+  curstate->exitvals = sl;
+}
+/*}}}*/
+void set_state_attribute(State *curstate, char *name)/*{{{*/
+{
+  Stringlist *sl;
+  sl = new(Stringlist);
+  sl->string = name;
+  sl->next = curstate->attributes;
+  curstate->attributes = sl;
+}
+/*}}}*/
+/* ================================================================= */
+void instantiate_block(Block *curblock, char *block_name, char *instance_name)/*{{{*/
+{
+  Block *master = lookup_block(block_name, USE_OLD_MUST_EXIST);
+  char namebuf[1024];
+  int i;
+  for (i=0; i<master->nstates; i++) {
+    State *s = master->states[i];
+    State *new_state;
+    Translist *tl;
+    Stringlist *sl, *ex;
+    
+    strcpy(namebuf, instance_name);
+    strcat(namebuf, ".");
+    strcat(namebuf, s->name);
+    
+    /* In perverse circumstances, we might already have a state called this */
+    new_state = lookup_state(curblock, namebuf, CREATE_OR_USE_OLD);
+    
+    for (tl=s->transitions; tl; tl=tl->next) {
+      Translist *new_tl = new(Translist);
+      new_tl->token = tl->token;
+      strcpy(namebuf, instance_name);
+      strcat(namebuf, ".");
+      strcat(namebuf, tl->ds_name);
+      new_tl->ds_name = new_string(namebuf);
+      new_tl->ds_ref = NULL;
+      new_tl->next = new_state->transitions;
+      new_state->transitions = new_tl;
+    }
+    
+    /*{{{  Copy state exit values*/
+    ex = NULL;
+    for (sl=s->exitvals; sl; sl=sl->next) {
+      Stringlist *new_sl = new(Stringlist);
+      new_sl->string = sl->string;
+      new_sl->next = ex;
+      ex = new_sl;
+    }
+    new_state->exitvals = ex;
+    /*}}}*/
+    /*{{{  Copy state attributes */
+    ex = NULL;
+    for (sl=s->attributes; sl; sl=sl->next) {
+      Stringlist *new_sl = new(Stringlist);
+      new_sl->string = sl->string;
+      new_sl->next = ex;
+      ex = new_sl;
+    }
+    new_state->attributes = ex;
+    /*}}}*/
+    
+  }
+}
+/*}}}*/
+void fixup_state_refs(Block *b)/*{{{*/
+{
+  int i;
+  for (i=0; i<b->nstates; i++) {
+    State *s = b->states[i];
+    Translist *tl;
+    for (tl=s->transitions; tl; tl=tl->next) {
+      tl->ds_ref = lookup_state(b, tl->ds_name, CREATE_OR_USE_OLD);
+    }
+  }
+}
+/*}}}*/
+/* ================================================================= */
+
+/* Bitmap to contain epsilon closure for NFA */
+
+static unsigned long **eclo;
+
+/* ================================================================= */
+static inline const int round_up(const int x) {/*{{{*/
+  return (x+31)>>5;
+}
+/*}}}*/
+static inline void set_bit(unsigned long *x, int n)/*{{{*/
+{
+  int r = n>>5;
+  unsigned long m = 1UL<<(n&31);
+  x[r] |= m;
+}
+/*}}}*/
+static inline int is_set(unsigned long *x, int n)/*{{{*/
+{
+  int r = n>>5;
+  unsigned long m = 1UL<<(n&31);
+  return !!(x[r] & m);
+}
+/*}}}*/
+/* ================================================================= */
+/* During the algorithm to transitively close the epsilon closure table,
+   maintain a stack of indices that have to be rescanned.  This avoids the slow
+   approach of repeatedly rescanning the whole table until no changes are
+   found. */
+
+typedef struct IntPair {
+  struct IntPair *next;
+  int i;
+  int j;
+} IntPair;
+
+static IntPair *freelist=NULL;
+static IntPair *stack=NULL;
+
+/* ================================================================= */
+static void push_pair(int i, int j)/*{{{*/
+{
+  static const int grow_by = 32;
+  IntPair *np;
+  
+  if (!freelist) {
+    IntPair *ip = new_array(IntPair, grow_by);
+    int x;
+    for (x=1; x<grow_by; x++) {
+      ip[x].next = &ip[x-1];
+    }
+    ip[0].next = NULL;
+    freelist = &ip[grow_by-1];
+  }
+  np = freelist;
+  freelist = freelist->next;
+  np->next = stack;
+  stack = np;
+  np->i = i;
+  np->j = j;
+}
+/*}}}*/
+static int pop_pair(int *i, int *j) {/*{{{*/
+  IntPair *ip;
+  if (!stack) {
+    return 0;
+  } else {
+    ip = stack;
+    *i = ip->i;
+    *j = ip->j;
+    stack = ip->next;
+    ip->next = freelist;
+    freelist = ip;
+    return 1;
+  }
+}
+/*}}}*/
+static void generate_epsilon_closure(Block *b)/*{{{*/
+{
+  int i, j, N;
+  
+  N = b->nstates;
+  eclo = new_array(unsigned long*, N);
+  for (i=0; i<N; i++) {
+    eclo[i] = new_array(unsigned long, round_up(N));
+    for (j=0; j<round_up(N); j++) {
+      eclo[i][j] = 0;
+    }
+  }
+
+  /* Determine initial immediate transitions */
+  for (i=0; i<N; i++) {
+    State *s = b->states[i];
+    Translist *tl;
+    int from_state = s->index;
+    set_bit(eclo[from_state], from_state); /* Always reflexive */
+    
+    for (tl=s->transitions; tl; tl=tl->next) {
+      if (tl->token < 0) { /* epsilon trans */
+        int to_state = tl->ds_ref->index;
+        set_bit(eclo[from_state], to_state);
+        push_pair(from_state, to_state);
+      }
+    }
+  }
+
+  /* Now keep on processing until the table is transitively closed */
+  while (pop_pair(&i, &j)) {
+    int k;
+    for (k=0; k<N; k++) {
+      if (is_set(eclo[j], k) && !is_set(eclo[i], k)) {
+        set_bit(eclo[i], k);
+        push_pair(i,k);
+      }
+    }
+  }
+}
+/*}}}*/
+static void print_nfa(Block *b)/*{{{*/
+{
+  int i, j, N;
+  N = b->nstates;
+  
+  if (!report) return;
+
+  for (i=0; i<N; i++) {
+    State *s = b->states[i];
+    Translist *tl;
+    Stringlist *sl;
+    fprintf(report, "NFA state %d = %s\n", i, s->name);
+    for (tl=s->transitions; tl; tl=tl->next) {
+      fprintf(report, "  [%s] -> %s\n",
+              (tl->token >= 0) ? toktable[tl->token] : "(epsilon)",
+              tl->ds_name);
+    }
+    if (s->exitvals) {
+      int first = 1;
+      fprintf(report, "  Exit value : ");
+      for (sl=s->exitvals; sl; sl=sl->next) {
+        fprintf(report, "%s%s",
+                first ? "" : "|",
+                sl->string);
+      }
+      fprintf(report, "\n");
+    }
+    if (s->attributes) {
+      int first = 1;
+      fprintf(report, "  Attributes : ");
+      for (sl=s->attributes; sl; sl=sl->next) {
+        fprintf(report, "%s%s",
+                first ? "" : "|",
+                sl->string);
+      }
+      fprintf(report, "\n");
+    }
+    fprintf(report, "  Epsilon closure :\n    (self)\n");
+    for (j=0; j<N; j++) {
+      if (i!=j && is_set(eclo[i], j)) {
+        fprintf(report, "    %s\n", b->states[j]->name);
+      }
+    }
+    
+    fprintf(report, "\n");
+  }
+
+}
+/*}}}*/
+/* ================================================================= */
+
+/* Indexed [from_state][token][to_state], flag set if there is
+   a transition from from_state to to_state, via token then zero or more
+   epsilon transitions */
+
+static unsigned long ***transmap;
+
+/* Index [from_nfa_state][token], flag set if there is a transition
+   to any destination nfa state for that token. */
+static unsigned long **anytrans;
+
+/* ================================================================= */
+static void build_transmap(Block *b)/*{{{*/
+{
+  int N = b->nstates;
+  int Nt = ntokens;
+  int i, j, k, m;
+  
+  transmap = new_array(unsigned long **, N);
+  anytrans = new_array(unsigned long *, N);
+  for (i=0; i<N; i++) {
+    transmap[i] = new_array(unsigned long *, Nt);
+    anytrans[i] = new_array(unsigned long, round_up(Nt));
+    for (j=0; j<round_up(Nt); j++) {
+      anytrans[i][j] = 0UL;
+    }
+    for (j=0; j<Nt; j++) {
+      transmap[i][j] = new_array(unsigned long, round_up(N));
+      for (k=0; k<round_up(N); k++) {
+        transmap[i][j][k] = 0UL;
+      }
+    }
+  }
+
+  for (i=0; i<N; i++) {
+    State *s = b->states[i];
+    Translist *tl;
+    for (tl=s->transitions; tl; tl=tl->next) {
+      if (tl->token >= 0) {
+        int dest = tl->ds_ref->index;
+        for (m=0; m<round_up(N); m++) {
+          unsigned long x = eclo[dest][m];
+          transmap[i][tl->token][m] |= x;
+          if (!!x) set_bit(anytrans[i], tl->token);
+        }
+      }
+    }
+  }
+
+  
+}
+/*}}}*/
+/* ================================================================= */
+
+static DFANode **dfas;
+static int ndfa=0;
+static int maxdfa=0;
+
+static int had_ambiguous_result = 0;
+
+/* ================================================================= */
+
+/* Implement an array of linked lists to access DFA states directly.  The
+ * hashes are given by folding the signatures down to single bytes. */
+
+struct DFAList {
+  struct DFAList *next;
+  DFANode *dfa;
+};
+
+#define DFA_HASHSIZE 256
+static struct DFAList *dfa_hashtable[DFA_HASHSIZE];
+
+/* ================================================================= */
+static void grow_dfa(void)/*{{{*/
+{ 
+  maxdfa += 32;
+  dfas = resize_array(DFANode*, dfas, maxdfa);
+}
+/*}}}*/
+static unsigned long fold_signature(unsigned long sig)/*{{{*/
+{
+  unsigned long folded;
+  folded = sig ^ (sig >> 16);
+  folded ^= (folded >> 8);
+  folded &= 0xff;
+  return folded;
+}
+/*}}}*/
+/* ================================================================= */
+static int find_dfa(unsigned long *nfas, int N)/*{{{*/
+/* Simple linear search.  Use 'signatures' to get rapid rejection
+   of any DFA state that can't possibly match */
+{
+  int res=-1;
+  int i, j;
+  unsigned long signature = 0UL;
+  unsigned long folded_signature;
+  struct DFAList *dfal;
+
+  for (j=0; j<round_up(N); j++) {
+    signature ^= nfas[j];
+  }
+  folded_signature = fold_signature(signature);
+  
+  for(dfal=dfa_hashtable[folded_signature]; dfal; dfal = dfal->next) {
+    DFANode *dfa = dfal->dfa;
+    int matched;
+
+    if (signature != dfa->signature) continue;
+    
+    matched=1;
+
+    for (j=0; j<round_up(N); j++) {
+      if (nfas[j] != dfa->nfas[j]) {
+        matched = 0;
+        break;
+      }
+    }
+    if (matched) {
+      return dfa->index;
+    }
+  }
+  return -1;
+}
+/*}}}*/
+
+static int add_dfa(Block *b, unsigned long *nfas, int N, int Nt, int from_state, int via_token)/*{{{*/
+{
+  int j;
+  int result = ndfa;
+  int had_exitvals;
+  int this_result_unambiguous;
+ 
+  Stringlist *ex;
+  unsigned long signature = 0UL, folded_signature;
+  struct DFAList *dfal;
+
+  if (verbose) {
+    fprintf(stderr, "Adding DFA state %d\r", ndfa);
+    fflush(stderr);
+  }
+
+  if (maxdfa == ndfa) {
+    grow_dfa();
+  }
+
+  dfas[ndfa] = new(DFANode);
+  dfas[ndfa]->nfas = new_array(unsigned long, round_up(N));
+  dfas[ndfa]->map = new_array(int, Nt);
+  for (j=0; j<Nt; j++) dfas[ndfa]->map[j] = -1;
+  dfas[ndfa]->index = ndfa;
+  dfas[ndfa]->defstate = -1;
+  
+  dfas[ndfa]->from_state = from_state;
+  dfas[ndfa]->via_token = via_token;
+  
+  for (j=0; j<round_up(N); j++) {
+    unsigned long x = nfas[j];
+    signature ^= x;
+    dfas[ndfa]->nfas[j] = x;
+  }
+  dfas[ndfa]->signature = signature;
+  
+  folded_signature = fold_signature(signature);
+  dfal = new(struct DFAList);
+  dfal->dfa = dfas[ndfa];
+  dfal->next = dfa_hashtable[folded_signature];
+  dfa_hashtable[folded_signature] = dfal;
+
+  /* {{{ Boolean reduction for result */
+  ex = NULL;
+  had_exitvals = 0;
+  clear_symbol_values(exit_evaluator);
+  for (j=0; j<N; j++) {
+    if (is_set(dfas[ndfa]->nfas, j)) {
+      Stringlist *sl;
+      State *s = b->states[j];
+      for (sl = s->exitvals; sl; sl = sl->next) {
+        Stringlist *new_sl;
+        new_sl = new(Stringlist);
+        new_sl->string = sl->string;
+        new_sl->next = ex;
+        ex = new_sl;
+
+        set_symbol_value(exit_evaluator, sl->string);
+        had_exitvals = 1;
+      }
+    }
+  }
+  
+  this_result_unambiguous = evaluate_result(exit_evaluator, &dfas[ndfa]->result, &dfas[ndfa]->result_early);
+  dfas[ndfa]->nfa_exit_sl = ex;
+
+  if (!this_result_unambiguous) {
+    Stringlist *sl;
+    fprintf(stderr, "WARNING : Ambiguous exit state abandoned for DFA state %d\n", ndfa);
+    fprintf(stderr, "NFA exit tags applying in this stage :\n");
+    for (sl = ex; sl; sl = sl->next) {
+      fprintf(stderr, "  %s\n", sl->string);
+    }
+    had_ambiguous_result = 1;
+  }
+  /*}}}*/
+  /* {{{ Boolean reduction for attributes */
+  ex = NULL;
+  had_exitvals = 0;
+  clear_symbol_values(attr_evaluator);
+  for (j=0; j<N; j++) {
+    if (is_set(dfas[ndfa]->nfas, j)) {
+      Stringlist *sl;
+      State *s = b->states[j];
+      for (sl = s->attributes; sl; sl = sl->next) {
+        Stringlist *new_sl;
+        new_sl = new(Stringlist);
+        new_sl->string = sl->string;
+        new_sl->next = ex;
+        ex = new_sl;
+
+        set_symbol_value(attr_evaluator, sl->string);
+        had_exitvals = 1;
+      }
+    }
+  }
+  this_result_unambiguous = evaluate_result(attr_evaluator, &dfas[ndfa]->attribute, NULL);
+  dfas[ndfa]->nfa_attr_sl = ex;
+
+  if (!this_result_unambiguous) {
+    Stringlist *sl;
+    fprintf(stderr, "WARNING : Ambiguous attribute abandoned for DFA state %d\n", ndfa);
+    fprintf(stderr, "NFA attribute tags applying in this stage :\n");
+    for (sl = ex; sl; sl = sl->next) {
+      fprintf(stderr, "  %s\n", sl->string);
+    }
+    had_ambiguous_result = 1;
+  }
+  /*}}}*/
+  
+  ndfa++;
+  return result;
+}
+/*}}}*/
+static void clear_nfas(unsigned long *nfas, int N)/*{{{*/
+{
+  int i;
+  for (i=0; i<round_up(N); i++) {
+    nfas[i] = 0;
+  }
+}
+/*}}}*/
+static void build_dfa(Block *b, int start_index)/*{{{*/
+{
+  unsigned long **nfas;
+  int i;
+  int N, Nt;
+  int next_to_do;
+  int *found_any;
+  int rup_N;
+
+  for (i=0; i<DFA_HASHSIZE; i++) dfa_hashtable[i] = NULL;
+  
+  N = b->nstates;
+  rup_N = round_up(N);
+  Nt = ntokens;
+  
+  /* Add initial state */
+  nfas = new_array(unsigned long *, Nt);
+  for (i=0; i<Nt; i++) {
+    nfas[i] = new_array(unsigned long, round_up(N));
+  }
+  clear_nfas(nfas[0], N);
+  for (i=0; i<round_up(N); i++) {
+    nfas[0][i] |= eclo[start_index][i];
+  }
+  add_dfa(b, nfas[0], N, Nt, -1, -1);
+  next_to_do = 0;
+  found_any = new_array(int, Nt);
+
+  /* Now the heart of the program : the subset construction to turn the NFA
+     into a DFA.  This is a major performance hog in the program, so there are
+     lots of tricks to speed this up (particularly, hoisting intermediate
+     pointer computations out of the loop to assert the fact that there is no
+     aliasing between the arrays.) */
+
+  while (next_to_do < ndfa) {
+
+    int t; /* token index */
+    int j0, j0_5, j1, j, mask, k, m;
+    int idx;
+    unsigned long *current_nfas;
+    unsigned long block_bitmap;
+
+    /* If the next DFA state has the result_early flag set, it means that the scanner will
+     * always exit straight away when that state is reached, so there's no need to compute
+     * any transitions out of it. */
+
+    if (dfas[next_to_do]->result_early) {
+      next_to_do++;
+      continue;
+    }
+
+    for (j=0; j<Nt; j++) {
+      clear_nfas(nfas[j], N);
+      found_any[j] = 0;
+    }
+
+    current_nfas = dfas[next_to_do]->nfas;
+    for (j0=0; j0<rup_N; j0++) { /* Loop over NFA states which may be in this DFA state */
+      block_bitmap = current_nfas[j0];
+      if (!block_bitmap) continue;
+      j0_5 = j0 << 5;
+      for (mask=1UL, j1=0; j1<32; mask<<=1, j1++) {
+        j = j0_5 + j1;
+        if (block_bitmap & mask) { /* Is NFA state in DFA */
+          unsigned long **transmap_j = transmap[j];
+          unsigned long *anytrans_j = anytrans[j];
+          for (t=0; t<Nt; t++) { /* Loop over transition symbols */
+            unsigned long *transmap_t;
+            unsigned long *nfas_t;
+            unsigned long found_any_t;
+            if (!is_set(anytrans_j, t)) continue;
+            transmap_t = transmap_j[t];
+            nfas_t = nfas[t];
+            found_any_t = found_any[t];
+            for (k=0; k<rup_N; k++) { /* Loop over destination NFA states */
+              unsigned long x;
+              x = transmap_t[k];
+              nfas_t[k] |= x;
+              found_any_t |= !!x;
+            }
+            found_any[t] = found_any_t;
+          }
+        }
+      }
+    }
+          
+    for (t=0; t<Nt; t++) {
+      if (found_any[t]) {
+        idx = find_dfa(nfas[t], N);
+        if (idx < 0) {
+          idx = add_dfa(b, nfas[t], N, Nt, next_to_do, t);
+        }
+      } else {
+        idx = -1;
+      }
+      dfas[next_to_do]->map[t] = idx;
+    }
+
+    next_to_do++;
+  }
+
+  free(found_any);
+  for (i=0; i<Nt; i++) free(nfas[i]);
+  free(nfas);
+}
+/*}}}*/
+/* ================================================================= */
+static void print_dfa(Block *b)/*{{{*/
+{
+  int N = b->nstates;
+  int Nt = ntokens;
+  
+  int i, j, j0, j0_5, j1, t;
+  unsigned long mask;
+  unsigned long current_nfas;
+  int rup_N = round_up(N);
+  Stringlist *ex;
+  int from_state, this_state, via_token, maxtrace;
+
+  if (!report) return;
+  
+  for (i=0; i<ndfa; i++) {
+    fprintf(report, "DFA state %d\n", i);
+    if (dfas[i]->nfas) {
+      fprintf(report, "  NFA states :\n");
+      for (j0=0; j0<rup_N; j0++) {
+        current_nfas = dfas[i]->nfas[j0];
+        if (!current_nfas) continue;
+        j0_5 = j0<<5;
+        for (j1=0, mask=1UL; j1<32; mask<<=1, j1++) {
+          if (current_nfas & mask) {
+            fprintf(report, "    %s\n", b->states[j0_5 + j1]->name);
+          }
+        }
+      }
+      fprintf(report, "\n");
+    }
+    fprintf(report, "  Reverse route :\n    HERE");
+    this_state = i;
+    from_state = dfas[i]->from_state;
+    maxtrace=0;
+    while (from_state >= 0) {
+      via_token = dfas[this_state]->via_token;
+      fprintf(report, "<-%s", toktable[via_token]);
+      this_state = from_state;
+      from_state = dfas[this_state]->from_state;
+      maxtrace++;
+      if (maxtrace>100) break;
+    }
+    fprintf(report, "\n");
+    
+    fprintf(report, "  Transitions :\n");
+    for (t=0; t<Nt; t++) {
+      int dest = dfas[i]->map[t];
+      if (dest >= 0) {
+        fprintf(report, "    %s -> %d\n", toktable[t], dest);
+      }
+    }
+    if (dfas[i]->defstate >= 0) {
+      fprintf(report, "  Use state %d as basis (%d fixups)\n",
+              dfas[i]->defstate, dfas[i]->best_diff);
+    }
+    if (dfas[i]->nfa_exit_sl) {
+      Stringlist *sl;
+      fprintf(report, "  NFA exit tags applying :\n");
+      for (sl=dfas[i]->nfa_exit_sl; sl; sl = sl->next) {
+        fprintf(report, "    %s\n", sl->string);
+      }
+    }
+    if (dfas[i]->result) {
+      fprintf(report, "  Exit value : %s\n", dfas[i]->result);
+    }
+    if (dfas[i]->attribute) {
+      fprintf(report, "  Attribute : %s\n", dfas[i]->attribute);
+    }
+
+    fprintf(report, "\n");
+  }
+}
+/*}}}*/
+/* ================================================================= */
+static void print_exitval_table(Block *b)/*{{{*/
+{
+  int N = b->nstates;
+  int Nt = ntokens;
+  int n, i, j;
+  extern char *prefix;
+  char ucprefix[1024];
+  char *defresult = get_defresult(exit_evaluator);
+
+  if (prefix) {
+    fprintf(output, "%s %s_exitval[] = {\n", get_result_type(exit_evaluator), prefix);
+  } else {
+    fprintf(output, "%s exitval[] = {\n", get_result_type(exit_evaluator));
+  }
+  for (i=0; i<ndfa; i++) {
+    fprintf(output, "%s", (dfas[i]->result) ? dfas[i]->result : defresult);
+    fputc ((i<(ndfa-1)) ? ',' : ' ', output);
+    fprintf(output, " /* State %d */\n", i);
+  }
+  fprintf(output, "};\n\n");
+}
+/*}}}*/
+static void print_attribute_table(void)/*{{{*/
+{
+  int i;
+  extern char *prefix;
+  char *defattr = get_defresult(attr_evaluator);
+
+  if (prefix) {
+    fprintf(output, "%s %s_attribute[] = {\n", get_result_type(attr_evaluator), prefix);
+  } else {
+    fprintf(output, "%s attribute[] = {\n", get_result_type(attr_evaluator));
+  }
+  for (i=0; i<ndfa; i++) {
+    char *av = dfas[i]->attribute;
+    fprintf(output, "%s", av ? av : defattr);
+    fputc ((i<(ndfa-1)) ? ',' : ' ', output);
+    fprintf(output, " /* State %d */\n", i);
+  }
+  fprintf(output, "};\n\n");
+
+}
+/*}}}*/
+static void write_next_state_function_uncompressed(int Nt)/*{{{*/
+{
+  extern char *prefix;
+  if (prefix) {
+    fprintf(output, "int %s_next_state(int current_state, int next_token) {\n", prefix);
+    fprintf(output, "  if (next_token < 0 || next_token >= %d) return -1;\n", Nt);
+    fprintf(output, "  return %s_trans[%d*current_state + next_token];\n", prefix, Nt); 
+    fprintf(output, "}\n");
+  } else {
+    fprintf(output, "int next_state(int current_state, int token) {\n");
+    fprintf(output, "  if (next_token < 0 || next_token >= %d) return -1;\n", Nt);
+    fprintf(output, "  return trans[%d*current_state + next_token];\n", Nt); 
+    fprintf(output, "}\n");
+  }
+}
+/*}}}*/
+static void print_uncompressed_tables(Block *b)/*{{{*/
+/* Print out the state/transition table uncompressed, i.e. every
+   token has an array entry in every state.  This is fast to access
+   but quite wasteful on memory with many states and many tokens. */
+{
+  int N = b->nstates;
+  int Nt = ntokens;
+  int n, i, j;
+  extern char *prefix;
+  char ucprefix[1024];
+
+  n = 0;
+  if (prefix) {
+    fprintf(output, "static short %s_trans[] = {", prefix);
+  } else {
+    fprintf(output, "static short trans[] = {");
+  }
+  for (i=0; i<ndfa; i++) {
+    for (j=0; j<Nt; j++) {
+      if (n>0) fputc (',', output);
+      if (n%8 == 0) {
+        fprintf(output, "\n  ");
+      } else {
+        fputc(' ', output);
+      }
+      n++;
+      fprintf(output, "%4d", dfas[i]->map[j]);
+    }
+  }
+
+  fprintf(output, "\n};\n\n");
+
+  write_next_state_function_uncompressed(Nt);
+  
+}
+/*}}}*/
+static int check_include_char(int this_state, int token)/*{{{*/
+{
+  if (dfas[this_state]->defstate >= 0) {
+    return (dfas[this_state]->map[token] !=
+            dfas[dfas[this_state]->defstate]->map[token]);
+  } else {
+    return (dfas[this_state]->map[token] >= 0);
+  }
+}
+/*}}}*/
+static void write_next_state_function_compressed(void)/*{{{*/
+/* Write the next_state function for traversing compressed tables into the
+   output file. */
+{
+  extern char *prefix;
+  if (prefix) {
+    fprintf(output, "int %s_next_state(int current_state, int next_token) {\n", prefix);
+    fprintf(output, "int h, l, m, xm;\n");
+    fprintf(output, "while (current_state >= 0) {\n");
+    fprintf(output, "  l = %s_base[current_state], h = %s_base[current_state+1];\n", prefix, prefix);
+    fprintf(output, "  while (h > l) {\n");
+    fprintf(output, "    m = (h + l) >> 1; xm = %s_token[m];\n", prefix);
+    fprintf(output, "    if (xm == next_token) goto done;\n");
+    fprintf(output, "    if (m == l) break;\n");
+    fprintf(output, "    if (xm > next_token) h = m;\n");
+    fprintf(output, "    else                 l = m;\n");
+    fprintf(output, "  }\n");
+    fprintf(output, "  current_state = %s_defstate[current_state];\n", prefix);
+    fprintf(output, "}\n");
+    fprintf(output, "return -1;\n");
+    fprintf(output, "done:\n");
+    fprintf(output, "return %s_nextstate[m];\n", prefix);
+    fprintf(output, "}\n");
+  } else {
+    fprintf(output, "int next_state(int current_state, int token) {\n");
+    fprintf(output, "int h, l, m, xm;\n");
+    fprintf(output, "while (current_state >= 0) {\n");
+    fprintf(output, "  l = base[current_state], h = base[current_state+1];\n");
+    fprintf(output, "  while (h > l) {\n");
+    fprintf(output, "    m = (h + l) >> 1; xm = token[m];\n");
+    fprintf(output, "    if (xm == next_token) goto done;\n");
+    fprintf(output, "    if (m == l) break;\n");
+    fprintf(output, "    if (xm > next_token) h = m;\n");
+    fprintf(output, "    else                 l = m;\n");
+    fprintf(output, "  }\n");
+    fprintf(output, "  current_state = defstate[current_state];\n");
+    fprintf(output, "}\n");
+    fprintf(output, "return -1;\n");
+    fprintf(output, "done:\n");
+    fprintf(output, "return nextstate[m];\n");
+    fprintf(output, "}\n");
+  }
+
+
+}
+/*}}}*/
+static void print_compressed_tables(Block *b)/*{{{*/
+/* Print state/transition table in compressed form.  This is more
+   economical on storage, but requires a bisection search to find
+   the next state for a given current state & token */
+{
+  int N = b->nstates;
+  int *basetab = new_array(int, ndfa+1);
+  int Nt = ntokens;
+  int n, i, j;
+  extern char *prefix;
+
+
+  n = 0;
+  if (prefix) {
+    fprintf(output, "static unsigned char %s_token[] = {", prefix);
+  } else {
+    fprintf(output, "static unsigned char token[] = {");
+  }
+  for (i=0; i<ndfa; i++) {
+    for (j=0; j<Nt; j++) {
+      if (check_include_char(i, j)) {
+        if (n>0) fputc (',', output);
+        if (n%8 == 0) {
+          fprintf(output, "\n  ");
+        } else {
+          fputc(' ', output);
+        }
+        n++;
+        fprintf(output, "%3d", j);
+      }
+    }
+  }
+  fprintf(output, "\n};\n\n");
+
+  n = 0;
+  if (prefix) {
+    fprintf(output, "static short %s_nextstate[] = {", prefix);
+  } else {
+    fprintf(output, "static short nextstate[] = {");
+  }
+  for (i=0; i<ndfa; i++) {
+    basetab[i] = n;
+    for (j=0; j<Nt; j++) {
+      if (check_include_char(i, j)) {
+        if (n>0) fputc (',', output);
+        if (n%8 == 0) {
+          fprintf(output, "\n  ");
+        } else {
+          fputc(' ', output);
+        }
+        n++;
+        fprintf(output, "%5d", dfas[i]->map[j]);
+      }
+    }
+  }
+  fprintf(output, "\n};\n\n");
+  basetab[ndfa] = n;
+
+  n = 0;
+  if (prefix) {
+    fprintf(output, "static unsigned short %s_base[] = {", prefix);
+  } else {
+    fprintf(output, "static unsigned short base[] = {");
+  }
+  for (i=0; i<=ndfa; i++) {
+    if (n>0) fputc (',', output);
+    if (n%8 == 0) {
+      fprintf(output, "\n  ");
+    } else {
+      fputc(' ', output);
+    }
+    n++;
+    fprintf(output, "%5d", basetab[i]);
+  }
+  fprintf(output, "\n};\n\n");
+  
+  n = 0;
+  if (prefix) {
+    fprintf(output, "static short %s_defstate[] = {", prefix);
+  } else {
+    fprintf(output, "static short defstate[] = {");
+  }
+  for (i=0; i<ndfa; i++) {
+    if (n>0) fputc (',', output);
+    if (n%8 == 0) {
+      fprintf(output, "\n  ");
+    } else {
+      fputc(' ', output);
+    }
+    n++;
+    fprintf(output, "%5d", dfas[i]->defstate);
+  }
+  fprintf(output, "\n};\n\n");
+  
+  free(basetab);
+
+  write_next_state_function_compressed();
+}
+/*}}}*/
+/* ================================================================= */
+void yyerror (char *s)/*{{{*/
+{
+  extern int lineno;
+  fprintf(stderr, "%s at line %d\n", s, lineno);
+}
+/*}}}*/
+int yywrap(void) /*{{{*/
+{ 
+  return -1;
+}
+/*}}}*/
+/* ================================================================= */
+
+int main (int argc, char **argv)
+{
+  int result;
+  State *start_state;
+  Block *main_block;
+
+  char *input_name = NULL;
+  char *output_name = NULL;
+  char *report_name = NULL;
+  int uncompressed_tables = 0;
+  int uncompressed_dfa = 0; /* Useful for debug */
+  verbose = 0;
+  report = NULL;
+
+  /*{{{ Parse cmd line arguments */
+  while (++argv, --argc) {
+    if (!strcmp(*argv, "-v") || !strcmp(*argv, "--verbose")) {
+      verbose = 1;
+    } else if (!strcmp(*argv, "-o") || !strcmp(*argv, "--output")) {
+      ++argv, --argc;
+      output_name = *argv;
+    } else if (!strcmp(*argv, "-r") || !strcmp(*argv, "--report")) {
+      ++argv, --argc;
+      report_name = *argv;
+    } else if (!strcmp(*argv, "-u") || !strcmp(*argv, "--uncompressed-tables")) {
+      uncompressed_tables = 1;
+    } else if (!strcmp(*argv, "-ud") || !strcmp(*argv, "--uncompressed-dfa")) {
+      uncompressed_dfa = 1;
+    } else if ((*argv)[0] == '-') {
+      fprintf(stderr, "Unrecognized command line option %s\n", *argv);
+    } else {
+      input_name = *argv;
+    }
+  }
+  /*}}}*/
+
+  if (input_name) {/*{{{*/
+    input = fopen(input_name, "r");
+    if (!input) {
+      fprintf(stderr, "Can't open %s for input, exiting\n", input_name);
+      exit(1);
+    }
+  } else {
+    input = stdin;
+  }
+  /*}}}*/
+  if (output_name) {/*{{{*/
+    output = fopen(output_name, "w");
+    if (!output) {
+      fprintf(stderr, "Can't open %s for writing, exiting\n", output_name);
+      exit(1);
+    }
+  } else {
+    output = stdout;
+  }
+/*}}}*/
+  if (report_name) {/*{{{*/
+    report = fopen(report_name, "w");
+    if (!report) {
+      fprintf(stderr, "Can't open %s for writing, no report will be created\n", report_name);
+    }
+  }
+/*}}}*/
+
+  if (verbose) {
+    fprintf(stderr, "General-purpose automaton builder\n");
+    fprintf(stderr, "Copyright (C) Richard P. Curnow  2000-2001\n");
+  }
+
+  eval_initialise();
+  
+  if (verbose) fprintf(stderr, "Parsing input...");
+  yyin = input;
+  
+  /* Set yyout.  This means that if anything leaks from the scanner, or appears
+     in a %{ .. %} block, it goes to the right place. */
+  yyout = output; 
+ 
+  result = yyparse();
+  if (result > 0) exit(1);
+  if (verbose) fprintf(stderr, "\n");
+
+  start_state = get_curstate(); /* The last state to be current in the input file is the entry state of the NFA */
+  main_block = start_state->parent;
+  if (verbose) fprintf(stderr, "Computing epsilon closure...\n");
+  generate_epsilon_closure(main_block);
+  print_nfa(main_block);
+#if 0
+  if (verbose) fprintf(stderr, "Compressing NFA...\n");
+  compress_nfa(main_block);
+#endif
+  build_transmap(main_block);
+  if (verbose) fprintf(stderr, "Building DFA...\n");
+  build_dfa(main_block, start_state->index);
+  if (report) {
+    fprintf(report, "--------------------------------\n"
+                    "DFA structure before compression\n"
+                    "--------------------------------\n");
+  }
+  print_dfa(main_block);
+  
+  if (had_ambiguous_result) {
+    fprintf(stderr, "No output written, there were ambiguous exit values for accepting states\n");
+    exit(2);
+  }
+  
+  if (!uncompressed_dfa) {
+    if (verbose) fprintf(stderr, "\nCompressing DFA...\n");
+    ndfa = compress_dfa(dfas, ndfa, ntokens);
+  }
+
+  if (verbose) fprintf(stderr, "\nCompressing transition tables...\n");
+  compress_transition_table(dfas, ndfa, ntokens);
+
+  if (report) {
+    fprintf(report, "-------------------------------\n"
+                    "DFA structure after compression\n"
+                    "-------------------------------\n");
+  }
+  if (verbose) fprintf(stderr, "Writing outputs...\n");
+  print_dfa(main_block);
+
+  print_exitval_table(main_block);
+  print_attribute_table();
+
+  if (uncompressed_tables) {
+    print_uncompressed_tables(main_block);
+  } else {
+    print_compressed_tables(main_block);
+  }
+
+  if (report) {
+    fclose(report);
+    report = NULL;
+  }
+  
+  return result;
+}
diff -urN jbofihe-0.36/dfasyn/n2d.h jbofihe-0.37/dfasyn/n2d.h
--- jbofihe-0.36/dfasyn/n2d.h	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/n2d.h	Wed Aug  8 22:41:52 2001
@@ -0,0 +1,205 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/dfasyn/n2d.h,v 1.1 2001/07/12 21:15:35 richard Exp $
+
+  Header file for NFA->DFA conversion utility.
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2000-2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+#ifndef N2D_H
+#define N2D_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define new(T) ((T *) malloc(sizeof(T)))
+#define new_array(T,N) ((T *) malloc((N) * sizeof(T)))
+#define resize_array(T,arr,newN) ((T *) ((arr) ? realloc(arr,(newN)*sizeof(T)) : malloc((newN)*sizeof(T))))
+#define new_string(s) strcpy((char *)malloc((strlen(s)+1)*sizeof(char)),s)
+
+/* For typecasting, especially useful for declarations of local ptrs to args
+   of a qsort comparison fn */
+#define Castdecl(x, T, nx) T nx = (T) x
+
+#define Castderef(x, T, nx) T nx = *(T*) x
+
+/* Globally visible options to control reporting */
+extern FILE *report;
+extern int verbose;
+
+struct State;
+struct Block;
+
+typedef struct Translist {
+  struct Translist *next;
+  int token;
+  char *ds_name;
+  struct State *ds_ref;
+} Translist;
+
+typedef struct Stringlist {
+  struct Stringlist *next;
+  char *string;
+} Stringlist;
+  
+typedef struct State {
+  char *name;
+  int index; /* Array index in containing block */
+  struct Block *parent;
+  Translist *transitions;
+  Stringlist *exitvals;
+  Stringlist *attributes;
+
+  /* Pointers to the nodes in the 'transitions' list, sorted into canonical order */
+  Translist **ordered_trans;
+  int n_transitions;
+
+  unsigned char removed; /* Flag indicating state has been pruned by compression stage */
+} State;
+
+typedef struct S_Stateset {
+  State **states;
+  int nstates;
+  int maxstates;
+} Stateset;
+
+#define HASH_BUCKETS 64
+#define HASH_MASK (HASH_BUCKETS-1)
+
+typedef struct Block {
+  char *name;
+
+  /* The master table of states within this block.  This has to be in a flat
+     array because we have to work with respect to state indices when doing the
+     2D bitmap stuff for the subset construction. */
+  State **states;
+  int nstates;
+  int maxstates;
+  
+  /* Hash table for getting rapid access to a state within the block, given
+     its name */
+  Stateset state_hash[HASH_BUCKETS];
+  
+  int subcount; /* Number for generating substates */
+} Block;
+
+typedef struct {
+  unsigned long *nfas;
+  unsigned long signature; /* All the longwords in the nfas array xor'ed together */
+  int index; /* Entry's own index in the array */
+  int *map; /* index by token code */
+  int from_state; /* the state which provided the first transition to this one (leading to its creation) */
+  int via_token; /* the token through which we got to this state the first time. */
+  Stringlist *nfa_exit_sl; /* NFA exit values */
+  Stringlist *nfa_attr_sl; /* NFA exit values */
+  char *result;    /* Result token, computed by boolean expressions defined in input text */
+  int result_early; /* If !=0, the scanner is expected to exit immediately this DFA state is entered.
+                       It means that no out-bound transitions have to be created. */
+  char *attribute; /* Attribute token, computed by boolean expressions defined in input text */
+
+  /* Fields calculated in compdfa.c */
+  
+  /* The equivalence class the state is in. */
+  int eq_class;
+
+  /* Temp. storage for the new eq. class within a single pass of the splitting alg. */
+  int new_eq_class; 
+
+  /* Signature field from above is also re-used. */
+
+  int is_rep; /* Set if state is chosen as the representative of its equivalence class. */
+  int new_index; /* New index assigned to the state. */
+
+  /* Fields calculated in tabcompr.c */
+  
+  unsigned long transition_sig;
+
+  /* Default state, i.e. the one that supplies transitions for tokens not
+     explicitly listed for this one. */
+  int defstate; 
+
+  /* Number of transitions that this state has different to those in the
+     default state. */
+  int best_diff; 
+
+} DFANode;
+
+
+/* Constants for 'create' args */  
+#define USE_OLD_MUST_EXIST 0
+#define CREATE_MUST_NOT_EXIST 1
+#define CREATE_OR_USE_OLD 2
+
+State *get_curstate(void);
+
+struct Abbrev;
+extern struct Abbrev * create_abbrev(char *name);
+extern void add_tok_to_abbrev(struct Abbrev *abbrev, char *tok);
+
+int lookup_token(char *name, int create);
+Block *lookup_block(char *name, int create);
+State *lookup_state(Block *in_block, char *name, int create);
+Stringlist * add_token(Stringlist *existing, char *token);
+void add_transitions(State *curstate, Stringlist *tokens, char *destination);
+State * add_transitions_to_internal(Block *curblock, State *addtostate, Stringlist *tokens);
+void add_exit_value(State *curstate, char *value);
+void set_state_attribute(State *curstate, char *name);
+void instantiate_block(Block *curblock, char *block_name, char *instance_name);
+void fixup_state_refs(Block *b);
+
+void compress_nfa(Block *b);
+
+/* In expr.c */
+typedef struct Expr Expr;
+
+typedef struct evaluator Evaluator;
+extern Evaluator *exit_evaluator;
+extern Evaluator *attr_evaluator;
+
+Expr * new_wild_expr(void);
+Expr * new_not_expr(Expr *c);
+Expr * new_and_expr(Expr *c1, Expr *c2);
+Expr * new_or_expr(Expr *c1, Expr *c2);
+Expr * new_xor_expr(Expr *c1, Expr *c2);
+Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3);
+Expr * new_sym_expr(char *sym_name);
+
+void define_symbol(Evaluator *x, char *name, Expr *e);
+void define_result(Evaluator *x, char *string, Expr *e, int early);
+void define_symresult(Evaluator *x, char *string, Expr *e, int early);
+void define_defresult(Evaluator *x, char *string);
+void clear_symbol_values(Evaluator *x);
+void set_symbol_value(Evaluator *x, char *sym_name);
+int evaluate_result(Evaluator *x, char **, int *);
+void define_defresult(Evaluator *x, char *text);
+void define_type(Evaluator *x, char *text);
+char* get_defresult(Evaluator *x);
+char* get_result_type(Evaluator *x);
+void eval_initialise(void);
+
+void compress_transition_table(DFANode **dfas, int ndfas, int ntokens);
+unsigned long increment(unsigned long x, int field);
+unsigned long count_bits_set(unsigned long x);
+
+/* Return new number of DFA states */
+int compress_dfa(DFANode **dfas, int ndfas, int ntokens);
+
+#endif /* N2D_H */
+
diff -urN jbofihe-0.36/dfasyn/parse.y jbofihe-0.37/dfasyn/parse.y
--- jbofihe-0.36/dfasyn/parse.y	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/parse.y	Wed Aug  8 22:41:52 2001
@@ -0,0 +1,170 @@
+/**********************************************************************
+  $Header: /cvs/src/jbofihe/dfasyn/parse.y,v 1.1 2001/07/12 21:15:35 richard Exp $
+
+  Grammar definition for input files defining an NFA
+
+ *********************************************************************/
+
+/* Copyright (C) Richard P. Curnow  2000-2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+%{
+#include "n2d.h"
+
+static Block *curblock = NULL; /* Current block being built */
+static State *curstate = NULL; /* Current state being worked on */
+static State *addtostate = NULL; /* Current state (incl ext) to which transitions are added */
+static struct Abbrev *curabbrev = NULL; /* Current definition being worked on */
+static Stringlist *curtranslist = NULL; /* Transition list prior to ARROW */
+
+/* Prefix set by prefix command */
+char *prefix = NULL;
+
+State *get_curstate(void) { return curstate; }
+
+%}
+
+%union {
+    char *s;
+    int i;
+    Stringlist *sl;
+    Expr *e;
+}
+
+%token STRING STATE TOKENS PREFIX ARROW BLOCK ENDBLOCK COLON EQUAL SEMICOLON COMMA
+%token ABBREV DEFINE 
+%type<s> STRING option
+%type<sl> option_seq transition_seq
+%type<e> expr
+
+%token RESULT SYMBOL SYMRESULT DEFRESULT
+%token EARLYRESULT EARLYSYMRESULT
+%token TYPE
+%token ATTR DEFATTR
+%token STAR
+%right QUERY COLON
+%left PIPE
+%left XOR
+%left AND
+%left NOT
+%left LPAREN RPAREN
+
+%%
+
+all : decl_seq ;
+
+decl_seq : /* empty */ | decl_seq decl ;
+
+decl : block_decl | tokens_decl | prefix_decl | abbrev_decl | result_decl | attr_decl ;
+
+/* Don't invalidate curstate at the end, this is the means of working out the
+   starting state of the NFA */
+block_decl : block1 block2 { fixup_state_refs(curblock); curblock = NULL; } ;
+
+block1 : BLOCK STRING { curblock = lookup_block($2, CREATE_MUST_NOT_EXIST); addtostate = curstate = NULL; } ;
+
+block2 : instance_decl_seq state_decl_seq ENDBLOCK ;
+
+prefix_decl : PREFIX STRING { prefix = $2; };
+
+tokens_decl : TOKENS token_seq ;
+
+abbrev_decl : ABBREV STRING { curabbrev = create_abbrev($2); }
+              EQUAL string_pipe_seq
+            ;
+
+token_seq : token_seq token | token ;
+
+string_pipe_seq : string_pipe_seq PIPE STRING { add_tok_to_abbrev(curabbrev, $3); }
+                |                      STRING { add_tok_to_abbrev(curabbrev, $1); }
+                ;
+
+token : STRING { (void) lookup_token($1, CREATE_MUST_NOT_EXIST); }
+
+instance_decl_seq : /* empty */ | instance_decl_seq instance_decl ;
+
+state_decl_seq : /* empty */ | state_decl_seq state_decl ;
+
+state_decl : STATE STRING { addtostate = curstate = lookup_state(curblock, $2, CREATE_OR_USE_OLD); }
+             opt_state_attribute
+             sdecl_seq ;
+
+opt_state_attribute : LPAREN STRING RPAREN
+                      { set_state_attribute(curstate, $2); }
+                    | /* empty */
+                    ;
+
+sdecl_seq : /* empty */ | sdecl_seq sdecl ;
+
+sdecl : transition_decl ;
+
+instance_decl : STRING COLON STRING { instantiate_block(curblock, $3 /* master_block_name */, $1 /* instance_name */ ); } ;
+
+transition_decl : transition_seq ARROW { curtranslist = $1; } destination_seq { addtostate = curstate; }
+                | transition_seq EQUAL STRING { addtostate = add_transitions_to_internal(curblock, addtostate, $1);
+                                                add_exit_value(addtostate, $3);
+                                                addtostate = curstate; }
+                ;
+
+destination_seq : STRING                       { add_transitions(addtostate, curtranslist, $1); }
+                | destination_seq COMMA STRING { add_transitions(addtostate, curtranslist, $3); }
+                ;
+
+transition_seq : option_seq { $$ = $1; }
+               | transition_seq SEMICOLON option_seq { addtostate = add_transitions_to_internal(curblock, addtostate, $1); $$ = $3; }
+               ;
+
+option_seq : option { $$ = add_token(NULL, $1); }
+           | option_seq PIPE option { $$ = add_token($1, $3); } ;
+
+option : STRING 
+       | /* empty */ { $$ = NULL; }
+       ;
+
+result_decl : RESULT STRING               { define_result(exit_evaluator, $2, NULL, 0); }
+            | RESULT    expr ARROW STRING { define_result(exit_evaluator, $4, $2, 0); }
+            | EARLYRESULT STRING            { define_result(exit_evaluator, $2, NULL, 1); }
+            | EARLYRESULT expr ARROW STRING { define_result(exit_evaluator, $4, $2, 1); }
+            | SYMRESULT expr ARROW STRING { define_symresult(exit_evaluator, $4, $2, 0); }
+            | EARLYSYMRESULT expr ARROW STRING { define_symresult(exit_evaluator, $4, $2, 1); }
+            | SYMBOL STRING EQUAL expr    { define_symbol(exit_evaluator, $2, $4); }
+            | DEFRESULT STRING            { define_defresult(exit_evaluator, $2); }
+            | TYPE STRING                 { define_type(exit_evaluator, $2); }
+            ;
+
+/* No 'early exit' form for attributes.  They are supposed to be actions that
+   are done en-route to the final exit condition. */
+attr_decl : ATTR RESULT STRING               { define_result(attr_evaluator, $3, NULL, 0); }
+          | ATTR RESULT    expr ARROW STRING { define_result(attr_evaluator, $5, $3, 0); }
+          | ATTR SYMRESULT expr ARROW STRING { define_symresult(attr_evaluator, $5, $3, 0); }
+          | ATTR SYMBOL STRING EQUAL expr    { define_symbol(attr_evaluator, $3, $5); }
+          | ATTR DEFRESULT STRING            { define_defresult(attr_evaluator, $3); }
+          | DEFATTR STRING                   { define_defresult(attr_evaluator, $2); }
+          | ATTR TYPE STRING                 { define_type(attr_evaluator, $3); }
+          ;
+
+expr : NOT expr { $$ = new_not_expr($2); }
+     | expr AND expr { $$ = new_and_expr($1, $3); }
+     | expr PIPE /* OR */ expr { $$ = new_or_expr($1, $3); }
+     | expr XOR expr { $$ = new_xor_expr($1, $3); }
+     | expr QUERY expr COLON expr { $$ = new_cond_expr($1, $3, $5); }
+     | LPAREN expr RPAREN { $$ = $2; }
+     | STRING { $$ = new_sym_expr($1); }
+     | STAR { $$ = new_wild_expr(); }
+     ;
+
diff -urN jbofihe-0.36/dfasyn/scan.l jbofihe-0.37/dfasyn/scan.l
--- jbofihe-0.36/dfasyn/scan.l	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/scan.l	Wed Aug  8 22:41:52 2001
@@ -0,0 +1,96 @@
+/**********************************************************************
+  $Header: /cvs/src/jbofihe/dfasyn/scan.l,v 1.1 2001/07/12 21:15:35 richard Exp $
+
+  Lexical analyser definition for input files defining an NFA
+
+ *********************************************************************/
+
+/* Copyright (C) Richard P. Curnow  2000-2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+%{
+#include "n2d.h"
+#include "parse.h"
+
+int lineno = 1;
+%}
+
+%x PASSTHRU
+%x STR
+
+%%
+
+STATE|State|state           { return STATE; }
+ABBREV|Abbrev|abbrev        { return ABBREV; }
+DEFINE|Define|define        { return DEFINE; }
+TOKENS|Tokens|tokens        { return TOKENS; }
+PREFIX|Prefix|prefix        { return PREFIX; }
+BLOCK|Block|block           { return BLOCK; }
+ENDBLOCK|EndBlock           { return ENDBLOCK; }
+Endblock|endblock           { return ENDBLOCK; }
+TYPE|Type|type              { return TYPE; }
+RESULT|Result|result        { return RESULT; }
+EARLYRESULT                 { return EARLYRESULT; }
+EarlyResult                 { return EARLYRESULT; }
+Earlyresult                 { return EARLYRESULT; }
+earlyresult                 { return EARLYRESULT; }
+ATTRIBUTE|ATTR              { return ATTR; }
+Attribute|Attr              { return ATTR; }
+attribute|attr              { return ATTR; }
+DEFATTR|DefAttr             { return DEFATTR; }
+Defattr|defattr             { return DEFATTR; }
+DEFRESULT|DefResult         { return DEFRESULT; }
+Defresult|defresult         { return DEFRESULT; }
+SYMBOL|Symbol|symbol        { return SYMBOL; }
+SYMRESULT|SymResult         { return SYMRESULT; }
+Symresult|symresult         { return SYMRESULT; }
+EARLYSYMRESULT              { return EARLYSYMRESULT; }
+EarlySymResult              { return EARLYSYMRESULT; }
+EarlySymresult              { return EARLYSYMRESULT; }
+Earlysymresult              { return EARLYSYMRESULT; }
+earlysymresult              { return EARLYSYMRESULT; }
+[A-Za-z0-9_.]+              { yylval.s = new_string(yytext); return STRING; }
+\#.*$                       { /* strip comments */ }
+\-\>                        { return ARROW; }
+=                           { return EQUAL; }
+\|                          { return PIPE; /* OR */ }
+\&                          { return AND; }
+\~                          { return NOT; }
+\!                          { return NOT; }
+\^                          { return XOR; }
+\*                          { return STAR; }
+\?                          { return QUERY; }
+\:                          { return COLON; }
+\;                          { return SEMICOLON; }
+\(                          { return LPAREN; }
+\)                          { return RPAREN; }
+\,                          { return COMMA; }
+\n                          { lineno++; }
+[ \t]+                      { /* ignore */ }
+^\%\{[ \t]*\n               { BEGIN PASSTHRU; }
+\"                          { BEGIN STR; }
+
+<PASSTHRU>^\%\}[ \t]*\n     { BEGIN INITIAL; }
+<PASSTHRU>\n                { fputs(yytext, yyout); lineno++; }
+<PASSTHRU>.+                { fputs(yytext, yyout); }
+
+<STR>\"                     { BEGIN INITIAL; }
+<STR>[^"]*                  { yylval.s = new_string(yytext); return STRING; }
+
+
+
diff -urN jbofihe-0.36/dfasyn/tabcompr.c jbofihe-0.37/dfasyn/tabcompr.c
--- jbofihe-0.36/dfasyn/tabcompr.c	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/dfasyn/tabcompr.c	Wed Aug  8 22:41:52 2001
@@ -0,0 +1,181 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/dfasyn/tabcompr.c,v 1.1 2001/07/12 21:15:35 richard Exp $
+
+  Routines to compress the DFA transition tables, by identifying where two DFA
+  states have a lot of transitions the same.
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+#include "n2d.h"
+
+/* ================================================================= */
+/* Treat 'x' as a set of 16 bit pairs, with field (0..15) specifying
+   which.  Increment the field'th bit pair as a gray code, in the
+   pattern 00->01->11->10->00 */
+
+unsigned long increment(unsigned long x, int field)
+{
+  int f2 = field + field;
+  static unsigned char transxor[4] = {1, 2, 2, 1};
+  unsigned long g = x >> f2;
+  unsigned long h = transxor[g&3];
+  return x ^ (h<<f2);
+}
+
+/* ================================================================= */
+/* Calculate the number of bits set in an unsigned long. */
+
+unsigned long count_bits_set(unsigned long x)
+{
+  unsigned long y = x;
+  unsigned long c;
+  c = 0x55555555UL;
+  y = ((y>>1) & c) + (y & c);
+  c = 0x33333333UL;
+  y = ((y>>2) & c) + (y & c);
+  y = (y>>4) + y;
+  c = 0x0f0f0f0fUL;
+  y &= c;
+  y = (y>>8) + y;
+  y = (y>>16) + y;
+  return y & 0x1f;
+}
+
+/* ================================================================= */
+/* Compute 'signatures' of the transitions out of a particular state.
+   The signature is given by considering the destination state numbers mod 16,
+   and counting how many transitions there are in each resulting equivalence
+   class.  The number is encoded using the gray code implied by the increment
+   fn. */
+
+static void
+compute_transition_sigs(DFANode **dfas, int ndfas, int ntokens)
+{
+  int i, j;
+  for (i=0; i<ndfas; i++) {
+    unsigned long ts = 0UL; /* transition signature */
+    for (j=0; j<ntokens; j++) {
+      unsigned long dest = dfas[i]->map[j];
+      dest &= 0xf; /* 16 bit pairs in 'ts' */
+      ts = increment(ts, dest);
+    }
+    dfas[i]->transition_sig = ts;
+  }
+}
+
+
+/* ================================================================= */
+
+#define REQUIRED_BENEFIT 2
+
+static void
+find_default_states(DFANode **dfas, int ndfas, int ntokens)
+{
+  int i, j, t;
+  int best_index;
+  int best_diff;
+  int trans_count; /* Number of transitions in working state */
+  unsigned long tsi;
+
+  for (i=0; i<ndfas; i++) {
+    trans_count = 0;
+    for (t=0; t<ntokens; t++) {
+      if (dfas[i]->map[t] >= 0) trans_count++;
+    }
+  
+    dfas[i]->defstate = -1; /* not defaulted */
+    best_index = -1;
+    best_diff = ntokens + 1; /* Worse than any computed value */
+    tsi = dfas[i]->transition_sig;
+    for (j=0; j<i; j++) {
+      unsigned long tsj;
+      unsigned long sigdiff;
+      int diffsize;
+
+      if (dfas[j]->defstate >= 0) continue; /* Avoid chains of defstates */
+      tsj = dfas[j]->transition_sig;
+
+      /* This is the heart of the technique : if we xor two vectors of bit
+         pairs encoded with the gray code above, and count the number of bits
+         set in the result, we get the sum of absolute differences of the bit
+         pairs.   The number of outgoing transitions that differ between the
+         states must be _at_least_ this value.  It may in fact be much greater
+         (i.e. we may get 'false matches').  However, this algorithm is a quick
+         way of filtering most of the useless potential default states out. */
+      
+      sigdiff = tsi ^ tsj;
+      diffsize = count_bits_set(sigdiff);
+      if (diffsize >= best_diff) continue;
+      if (diffsize >= trans_count) continue; /* Else pointless! */
+
+      /* Otherwise, do an exact check (i.e. see how much false matching we
+         suffered). */
+      diffsize = 0;
+      for (t=0; t<ntokens; t++) {
+        if (dfas[i]->map[t] != dfas[j]->map[t]) {
+          diffsize++;
+        }
+      }
+
+      if (((best_index < 0) || (diffsize < best_diff))
+          &&
+          (diffsize < (trans_count - REQUIRED_BENEFIT))) {
+        best_index = j;
+        best_diff = diffsize;
+      }
+    }
+
+    dfas[i]->defstate = best_index;
+    dfas[i]->best_diff = best_diff;
+  }
+}
+
+/* ================================================================= */
+
+void
+compress_transition_table(DFANode **dfas, int ndfas, int ntokens)
+{
+  compute_transition_sigs(dfas, ndfas, ntokens);
+  find_default_states(dfas, ndfas, ntokens);
+}
+
+/* ================================================================= */
+
+#ifdef TEST
+int main () {
+  unsigned long x = 0;
+  unsigned long x1, x2, x3, x4;
+  x1 = increment(x,  2);
+  x2 = increment(x1, 2);
+  x3 = increment(x2, 2);
+  x4 = increment(x3, 2);
+  printf("%d %d %d %d %d\n", x, x1, x2, x3, x4);
+
+  printf("1=%d\n", count_bits_set(0x00000001));
+  printf("2=%d\n", count_bits_set(0x00000003));
+  printf("3=%d\n", count_bits_set(0x00000007));
+  printf("4=%d\n", count_bits_set(0x0000000f));
+  printf("4=%d\n", count_bits_set(0xf0000000));
+  
+  return 0;
+}
+#endif
+
+  
diff -urN jbofihe-0.36/dict2inc.pl jbofihe-0.37/dict2inc.pl
--- jbofihe-0.36/dict2inc.pl	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/dict2inc.pl	Wed Aug  8 22:41:47 2001
@@ -1,6 +1,6 @@
 #!/usr/bin/env perl
 
-# $Header: /cvs/src/jbofihe/dict2inc.pl,v 1.1 2001/03/04 23:01:18 richard Exp $
+# $Header: /cvs/src/jbofihe/dict2inc.pl,v 1.2 2001/06/19 20:54:06 richard Exp $
 
 # Script to read in a load of dictionary data, sort it, and write it out in the
 # form of a C structure initialisation.  This can be used for embedding the
@@ -33,7 +33,10 @@
     }
 
     if (/^([^:]+):([^:]+)/) {
-        $def{$1} = $2;
+        my $word = $1;
+        my $def = $2;
+        $def =~ s/\"/\\"/go;
+        $def{$word} = $def;
     }
 }
 
diff -urN jbofihe-0.36/doskit.be jbofihe-0.37/doskit.be
--- jbofihe-0.36/doskit.be	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/doskit.be	Wed Aug  8 22:41:47 2001
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-# $Header: /cvs/src/jbofihe/doskit.be,v 1.6 2001/03/20 21:25:43 richard Exp $
+# $Header: /cvs/src/jbofihe/doskit.be,v 1.7 2001/07/31 20:46:12 richard Exp $
 
 # The flow for building the DOS binary-only release is
 # 1. Make usual Unix source release
@@ -9,6 +9,17 @@
 # 4. Boot into DOS and make the executables
 # 5. Run some basic sanity checks
 # 6. Reboot Linux and run this script
+
+if [ ! -d /dosc/jbofihe ]; then
+    echo "You need to mount /dosc first!"
+    exit 1
+fi
+
+make jbofihe.txt
+make cmafihe.txt
+make smujajgau.txt
+make jvocuhadju.txt
+make vlatai.txt
 
 cp /dosc/jbofihe/jbofihe.exe .
 cp /dosc/jbofihe/cmafihe.exe .
diff -urN jbofihe-0.36/doskit.fe jbofihe-0.37/doskit.fe
--- jbofihe-0.36/doskit.fe	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/doskit.fe	Wed Aug  8 22:41:47 2001
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-# $Header: /cvs/src/jbofihe/doskit.fe,v 1.8 2001/03/20 21:25:43 richard Exp $
+# $Header: /cvs/src/jbofihe/doskit.fe,v 1.10 2001/07/31 20:46:12 richard Exp $
 
 # The flow for building the DOS binary-only release is
 # 1. Make usual Unix source release
@@ -34,15 +34,10 @@
 make morf_lex.c
 make morfvlex.c
 make morf_dfa.c
-make morfnc_dfa.c
+make bctables.c
 make trctabs.c
 make trcftabs.c
 make elitabs.c
-make jbofihe.txt
-make cmafihe.txt
-make smujajgau.txt
-make jvocuhadju.txt
-make vlatai.txt
 
 cp *.c *.h smujmaji.dat canonluj.inc Makefile.dos /dosc/jbofihe
 
diff -urN jbofihe-0.36/extradict jbofihe-0.37/extradict
--- jbofihe-0.36/extradict	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/extradict	Wed Aug  8 22:41:47 2001
@@ -1,5 +1,5 @@
 ################################################################################
-# $Header: /cvs/src/jbofihe/extradict,v 1.26 2000/12/05 23:09:28 richard Exp $
+# $Header: /cvs/src/jbofihe/extradict,v 1.28 2001/08/08 21:33:08 richard Exp $
 #
 # This file contains a number of additions to the glossing dictionary extending
 # what the LLG files at ftp://xiron.pc.helsinki.fi/pub/lojban/wordlists
@@ -104,6 +104,8 @@
 ja'a:{AFFIRM}
 zo'u:({- as for)
 vau: :end simple bridi
+xa'o:insufficient
+za'o:excessive
 
 cai:extreme
 sai:strong
@@ -345,3 +347,16 @@
 +re'u:ROI
 +pe'a:UI
 +po'a:UI
+
+# Letters
+a.bu:"a"
+e.bu:"e"
+i.bu:"i"
+o.bu:"o"
+u.bu:"u"
+y'y.bu:"h"
+ky.bu:"q"
+vy.bu:"w"
+denpa.bu:"."
+slaku.bu:","
+
diff -urN jbofihe-0.36/functions.h jbofihe-0.37/functions.h
--- jbofihe-0.36/functions.h	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/functions.h	Wed Aug  8 22:41:47 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/functions.h,v 1.12 2001/03/09 22:18:55 richard Exp $
+  $Header: /cvs/src/jbofihe/functions.h,v 1.13 2001/06/22 22:17:14 richard Exp $
 
   Prototypes for functions
   ***************************************/
@@ -144,13 +144,13 @@
 
 /* In translate.c */
 char * translate(char *word);
-char *translate_unknown(char *w, int place);
 typedef enum {
   TCX_NOUN = 0,
   TCX_VERB = 1,
   TCX_QUAL = 2,
   TCX_TAG = 3
 } TransContext;
+char *translate_unknown(char *w, int place, TransContext ctx);
 char *adv_translate(char *w, int place, TransContext ctx);
 
 /* In output.c */
diff -urN jbofihe-0.36/htmlout.c jbofihe-0.37/htmlout.c
--- jbofihe-0.36/htmlout.c	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/htmlout.c	Wed Aug  8 22:41:47 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/htmlout.c,v 1.1 1999/06/12 08:23:10 richard Exp $
+  $Header: /cvs/src/jbofihe/htmlout.c,v 1.2 2001/05/28 21:30:15 richard Exp $
 
   Driver for producing HTML output from the glosser.
   ***************************************/
@@ -401,9 +401,14 @@
   }
 }
 
+static void write_partial_tag_text(char *t)/*{{{*/
+{
+  printf("%s", t);
+}
+/*}}}*/
 
-/*+  +*/
-DriverVector html_driver = {
+DriverVector html_driver =/*{{{*/
+{
   initialise,
   write_prologue,
   write_epilog,
@@ -415,5 +420,6 @@
   start_tags,
   end_tags,
   start_tag,
-  write_tag_text
-};
+  write_tag_text,
+  write_partial_tag_text
+};/*}}}*/
diff -urN jbofihe-0.36/jbofihe.1 jbofihe-0.37/jbofihe.1
--- jbofihe-0.36/jbofihe.1	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/jbofihe.1	Wed Aug  8 22:41:47 2001
@@ -3,7 +3,7 @@
 jbofihe \- Lojban parser / grammar checker / word glosser
 .SH SYNOPSIS
 .B jbofihe
-[-v] [ -h | --help | -? ] [ -k ] [ -g ] [ -t ] [-tf ] [ -l ] [ -b ] [ -x ] [ -H ] [ -m ] [ -w width ] [-ie] [-re] [-se] [-sev] [-cr] [-bt] [filename]
+[-v] [ -h | --help | -? ] [ -k ] [ -t ] [-tf ] [ -l ] [ -b ] [ -x ] [ -H ] [ -m ] [ -w width ] [-ie] [-re] [-se] [-sev] [-cr] [-bt] [filename]
 .SH DESCRIPTION
 .I jbofihe
 is a program that reads a piece of Lojban text and checks it for
@@ -28,9 +28,6 @@
 .B -k
 Display lists of tokens after parse and after pseudo-token insertion
 (mainly useful for debugging the lexer and parser).
-.TP
-.B -g
-Produce a rough English gloss (superseded)
 .TP
 .B -t
 Produce a syntax tree on stdout.  Nodes with only one child will be
diff -urN jbofihe-0.36/jbofihe.lsm jbofihe-0.37/jbofihe.lsm
--- jbofihe-0.36/jbofihe.lsm	Mon Mar 26 22:03:05 2001
+++ jbofihe-0.37/jbofihe.lsm	Thu Jan  1 01:00:00 1970
@@ -1,16 +0,0 @@
-Begin3
-Title:          jbofihe
-Version:        0.36
-Entered-date:   26MAR01
-Description:    A parser for Lojban, a constructed human language
-                with a machine-parseable grammar (see www.lojban.org).
-                Also outputs rough English translations.  Several
-                related utilities bundled.
-Keywords:       lojban, constructed language
-Author:         rpc@myself.com (Richard Curnow)
-Maintained-by:  rpc@myself.com (Richard Curnow)
-Primary-site:   sunsite.unc.edu /pub/Linux/apps/misc
-                316k jbofihe-0.36.tar.gz
-Platforms:      Unix, MSDOS
-Copying-policy: GPL
-End
diff -urN jbofihe-0.36/jvocuhadju.1 jbofihe-0.37/jvocuhadju.1
--- jbofihe-0.36/jvocuhadju.1	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/jvocuhadju.1	Wed Aug  8 22:41:47 2001
@@ -4,9 +4,7 @@
 .SH SYNOPSIS
 .PP
 .B jvocuhadju
-[-v]
-.PP
-.B jvocuhadju
+[-v] [-a] [-l]
 .I tanru-component-1
 [
 .BR "" ...
@@ -20,10 +18,21 @@
 displayed ranked in order of desirability, with their scores shown
 based on the algorithm in the reference grammar.  The lowest score is
 best.
+.PP
+Where a tanru-component is a cmavo containing an apostrophe, the apostrophe may
+be replaced by the letter "h" on the command line.  This is a convenience to
+avoid having to backslash-escape the apostrophe to hide it from the shell.
 .SH OPTIONS
 .TP
 .B -v
 Show the program version and exit.
+.TP
+.B -a
+List all possible lujvo for the input tanru (default is just the best 8).
+.TP
+.B -l
+Allow 4/5 letter rafsi to be picked even if a 3 letter form is available for
+any particular tanru component.
 .TP
 .B tanru-component-k
 This is a gismu or cmavo which is one component of the word.  Cmavo
diff -urN jbofihe-0.36/jvocuhadju.c jbofihe-0.37/jvocuhadju.c
--- jbofihe-0.36/jvocuhadju.c	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/jvocuhadju.c	Wed Aug  8 22:41:47 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/jvocuhadju.c,v 1.5 2000/11/28 21:39:51 richard Exp $
+  $Header: /cvs/src/jbofihe/jvocuhadju.c,v 1.7 2001/07/08 21:48:37 richard Exp $
 
   Program to generate the lujvo form of a given tanru
   ***************************************/
@@ -31,6 +31,8 @@
 #include "lujvofns.h"
 #include "version.h"
 
+static int uselong = 0; /* Consider lujvo including long rafsi when short ones are available */
+static int showall = 0; /* List all lujvo, not just the best MAXLUJVO of them */
 
 static int ends_in_vowel(char *s) {
   char *p;
@@ -90,7 +92,7 @@
   char e1, f2;
   char *p;
   char trial[3];
-  int test1, test2, test3;
+  int test1;
   f2 = s2[0];
   p = s1;
   while (*p) p++;
@@ -714,12 +716,11 @@
   int i, j, k, n;
   int last;
   int index, si;
-  int uselong;
-  char rr[5];
   int c[MAXT]; /* Counters over the rafsi forms for each argument
                   (implements an arbitrarily-nested for loop) */
 
-  uselong = 0;
+  int check1, check2, check3, check4;
+
   i=0;
   while (*tanru) {
     strcpy(t[i], *tanru);
@@ -748,8 +749,8 @@
           j++;
         }
       }
-      if (uselong || j==0) {
-        strcpy(r[i][0], t[i]);
+      if ((uselong || j==0) && (strlen(t[i]) == 5)) {
+        strcpy(r[i][j], t[i]);
         j++;
       }
       nr[i] = j;
@@ -761,9 +762,9 @@
         j++;
       }
       if ((uselong || j==0) && (strlen(t[i]) == 5)) {
-        strcpy(r[i][0], t[i]);
-        chop_last_char(r[i][0]);
-        j ++;
+        strcpy(r[i][j], t[i]);
+        chop_last_char(r[i][j]);
+        j++;
       }
       nr[i] = j;
     }
@@ -797,7 +798,7 @@
     }
 
     /* Work out glue */
-    if ((nt > 2) && is_cvv(r[0][c[0]])) {
+    if ((nt > 2) && (is_cvv(r[0][c[0]]) || is_cvav(r[0][c[0]]))) {
       /* Require r or n hyphen to stop initial cmavo falling off */
       if (r[1][c[1]][0] == 'r') {
         g[0] = 'n';
@@ -805,12 +806,11 @@
         g[0] = 'r';
       }
     } else {
-#if 0
-      if (!strcmp(r[0][c[0]], "lo'i") && !strcmp(r[1][c[1]], "lei")) {
-        printf("nt=%d test1=%d test2=%d\n", nt, is_cvv(r[0][c[0]]), is_ccv(r[1][c[1]]));
-      }
-#endif
-      if ((nt == 2) && is_cvv(r[0][c[0]]) && !is_ccv(r[1][c[1]])) {
+      check1 = (nt == 2);
+      check2 = is_cvv(r[0][c[0]]) || is_cvav(r[0][c[0]]);
+      check3 = is_ccv(r[1][c[1]]);
+      check4 = is_ccvcv(r[1][c[1]]);
+      if (check1 && check2 && (!check3 || check4)) {
         if (r[1][c[1]][0] == 'r') {
           g[0] = 'n';
         } else {
@@ -969,7 +969,7 @@
 
   qsort(lujvo, nl, sizeof(Lujvo), compare_lujvo);
 
-  if (nl>MAXLUJVO) nl = MAXLUJVO;
+  if (!showall && (nl>MAXLUJVO)) nl = MAXLUJVO;
   for (i=0; i<nl; i++) {
     printf("%6d %s\n", lujvo[i].score, lujvo[i].word);
   }
@@ -977,10 +977,26 @@
 }
 
 int main (int argc, char **argv) {
-  if (argc > 1 && !strcmp(argv[1], "-v")) {
-    fprintf(stderr, "jvocuhadju version %s\n", version_string);
-    exit(0);
+  char *words[MAXT];
+  char **wp;
+  wp = words;
+  while (++argv, --argc) {
+    if (!strcmp(*argv, "-v")) {
+      fprintf(stderr, "jvocuhadju version %s\n", version_string);
+      exit(0);
+    } else if (!strcmp(*argv, "-a")) {
+      showall = 1;
+    } else if (!strcmp(*argv, "-l")) {
+      uselong = 1;
+    } else if ((*argv)[0] == '-') {
+      fprintf(stderr, "Unrecognised command line option %s\n", *argv);
+      exit(1);
+    } else {
+      *wp = *argv;
+      ++wp;
+    }
   }
-  makelujvo(argv+1);
+  *wp = NULL;
+  makelujvo(words);
   return 0;
 }
diff -urN jbofihe-0.36/latex.c jbofihe-0.37/latex.c
--- jbofihe-0.36/latex.c	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/latex.c	Wed Aug  8 22:41:47 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/latex.c,v 1.4 1999/08/05 22:13:15 richard Exp $
+  $Header: /cvs/src/jbofihe/latex.c,v 1.5 2001/05/28 21:30:15 richard Exp $
 
   Driver for producing LaTeX output from the glosser.
   ***************************************/
@@ -425,19 +425,7 @@
   first_tag = 0;
 }
 
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  char *brivla
-
-  char *place
-
-  char *trans
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_tag_text(char *brivla, char *place, char *trans, int brac)
+static void write_tag_text(char *brivla, char *place, char *trans, int brac)/*{{{*/
 {
   if (brac) {
     printf("\\textsl{\\footnotesize{}%s%s (%s)}\n", brivla, place, make_texsafe(trans));
@@ -445,10 +433,15 @@
     printf("\\textsl{\\footnotesize{}%s%s %s}\n", brivla, place, make_texsafe(trans));
   }
 }
+/*}}}*/
+static void write_partial_tag_text(char *t)/*{{{*/
+{
+  printf("\\textsl{\\footnotesize{}%s}\n", t);
+}
+/*}}}*/
 
-
-/*+  +*/
-DriverVector latex_driver = {
+DriverVector latex_driver =/*{{{*/
+{
   initialise,
   write_prologue,
   latex_write_epilog,
@@ -461,4 +454,5 @@
   end_tags,
   start_tag,
   write_tag_text,
-};
+  write_partial_tag_text
+};/*}}}*/
diff -urN jbofihe-0.36/latexblk.c jbofihe-0.37/latexblk.c
--- jbofihe-0.36/latexblk.c	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/latexblk.c	Wed Aug  8 22:41:47 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/latexblk.c,v 1.6 1999/10/06 21:13:24 richard Exp $
+  $Header: /cvs/src/jbofihe/latexblk.c,v 1.7 2001/05/28 21:30:15 richard Exp $
 
   Driver for producing LaTeX output, using blocks rather than
   free-flow.
@@ -440,9 +440,20 @@
   strcat(tag_text, buffer);
 }
 
+static void write_partial_tag_text(char *t)/*{{{*/
+{
+  char buffer[1024];
 
-/*+  +*/
-DriverVector latex_block_driver = {
+  sprintf(buffer, "%s", t);
+  if (tag_text[1]) { /* '[' is pre-inserted by start_tags */
+    strcat(tag_text, "\n");
+  }
+  strcat(tag_text, buffer);
+}
+/*}}}*/
+
+DriverVector latex_block_driver =/*{{{*/
+{
   initialise,
   write_prologue,
   latex_write_epilog,
@@ -455,4 +466,5 @@
   end_tags,
   start_tag,
   write_tag_text,
-};
+  write_partial_tag_text
+};/*}}}*/
diff -urN jbofihe-0.36/lex1.c jbofihe-0.37/lex1.c
--- jbofihe-0.36/lex1.c	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/lex1.c	Wed Aug  8 22:41:48 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/lex1.c,v 1.19 2001/03/26 20:07:52 richard Exp $
+  $Header: /cvs/src/jbofihe/lex1.c,v 1.23 2001/07/24 21:21:28 richard Exp $
 
   Read the input file and perform low-level lexing functions.
   ***************************************/
@@ -32,6 +32,7 @@
 #include "functions.h"
 #include "lujvofns.h"
 #include "morf.h"
+#include "bccheck.h"
 
 static char zoi_form[8]; /* la'o or zoi */
 static int  zoi_delim_next;
@@ -194,52 +195,31 @@
   ++++++++++++++++++++++++++++++++++++++*/
 
 static void
-process_cmene(char *buf, int start_line, int start_column)
+process_cmene(char *buf, int start_line, int start_column,
+              int is_bad, int can_split, char *ladoi, char *tail)
 {
   TreeNode *tok;
-  char *p;
 
-  /* Check for invalid construct within name. */
-  p = buf;
-  while (*p) {
-    if ((!strncmp(p, "la",  2) && is_consonant(p[2])) ||
-        (!strncmp(p, "lai", 3) && is_consonant(p[3])) ||
-        (!strncmp(p, "doi", 3) && is_consonant(p[3]))) {
-      if ((p == buf) || is_vowel(*(p-1))) {
-        char temp[128], *q;
-        int len;
-
-        /* Deal with what comes before la, lai, doi */
-        len = p - buf;
-        strncpy(temp, buf, len);
-        if (len > 0) {
-          temp[len] = 0;
-          process_word(temp, start_line, start_column);
-        }
-        
-        /* Deal with la, lai, doi */
-        temp[0] = *p++;
-        q = temp + 1;
-        while (is_vowel(*p)) {
-          *q++ = *p++;
-        }
-        *q = 0;
-        process_word(temp, start_line, start_column+len);
-
-        /* Deal with the tail */
-        process_cmene(p, start_line, start_column + (p-buf));
-        return;
-      }
-    }
-    p++;
-  }
+  if (is_bad)  {
+    char prefix[1024], labuf[8];
+    char *p, *q;
+    int len1, len2;
+    for (p=buf, q=prefix, len1=0; p!=ladoi; len1++) *q++ = *p++;
+    *q = 0;
+    for (p=ladoi, q=labuf, len2=0; p!=tail; len2++) *q++ = *p++;
+    *q = 0;
 
-  tok = new_node();
-  tok->start_line = start_line;
-  tok->start_column = start_column;
-  tok->type = N_CMENE;
-  tok->data.cmene.word = new_string(buf);
-  add_token(tok);
+    if (prefix[0]) process_word(prefix, start_line, start_column);
+    process_word(labuf, start_line, start_column+len1);
+    process_word(tail, start_line, start_column+len1+len2);
+  } else {
+    tok = new_node();
+    tok->start_line = start_line;
+    tok->start_column = start_column;
+    tok->type = N_CMENE;
+    tok->data.cmene.word = new_string(buf);
+    add_token(tok);
+  }
 }
 
 /*++++++++++++++++++++++++++++++++++++++
@@ -282,6 +262,7 @@
   char *word_starts[1024];
   char **pws, **pwe;
   MorfType morf_type;
+  struct morf_xtra mx;
   int column, incr;
 
   if (zoi_data) {
@@ -331,7 +312,9 @@
   /* Analyse word type */
   
   pws = pwe = word_starts;
-  morf_type = morf_scan(buf, &pwe);
+  /* FIXME: Need to get morf_xtra info back here, to help with splitting bad
+   * cmene */
+  morf_type = morf_scan(buf, &pwe, &mx);
   column = start_column;
   switch (morf_type) {
     case MT_BOGUS:
@@ -354,6 +337,8 @@
       break;
     case MT_FUIVLA3:
     case MT_FUIVLA3_CVC:
+    case MT_FUIVLA3X:
+    case MT_FUIVLA3X_CVC:
       add_preceding_cmavo(pws, pwe, start_line, &column);
       add_brivla_token(*pwe, start_line, column, BVT_FUIVLA3);
       break;
@@ -362,7 +347,11 @@
       add_brivla_token(*pwe, start_line, column, BVT_FUIVLA4);
       break;
     case MT_CMENE:
-      process_cmene(buf, start_line, column);
+      process_cmene(buf, start_line, column,
+                    mx.u.cmene.is_bad,
+                    mx.u.cmene.can_split,
+                    mx.u.cmene.ladoi,
+                    mx.u.cmene.tail);
       break;
     case MT_CMAVOS:
       {
diff -urN jbofihe-0.36/lujvofns.c jbofihe-0.37/lujvofns.c
--- jbofihe-0.36/lujvofns.c	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/lujvofns.c	Wed Aug  8 22:41:48 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/lujvofns.c,v 1.3 2000/11/30 23:58:29 richard Exp $
+  $Header: /cvs/src/jbofihe/lujvofns.c,v 1.5 2001/07/09 22:07:45 richard Exp $
 
   Functions to do with manipulating lujvo etc.
   ***************************************/
@@ -36,7 +36,7 @@
 #define UNVOICED  0100
 #define UPPERCASE 0200
 
-unsigned char attr_table[256] = {
+unsigned char attr_table[256] = {/*{{{*/
   0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, /* 000 - 007 */
   0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, /* 010 - 017 */
   0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, /* 020 - 027 */
@@ -72,98 +72,36 @@
   0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, /* 350 - 357 */
   0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000, /* 360 - 367 */
   0000, 0000, 0000, 0000, 0000, 0000, 0000, 0000  /* 370 - 377 */
-};
+};/*}}}*/
 
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_consonant
-
-  char c
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_consonant(char c) {
-  return !!(attr_table[c] & CONSONANT);
+int is_consonant(char c) {/*{{{*/
+  return !!(attr_table[(unsigned)c & 0xff] & CONSONANT);
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Check whether a character is a consonant.
-
-  int is_consonant
-
-  char c
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_uppercase_consonant(char c) {
+/*}}}*/
+int is_uppercase_consonant(char c) {/*{{{*/
   int m = CONSONANT | UPPERCASE;
-  return ((attr_table[c] & m) == m);
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_vowel
-
-  char c
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_vowel(char c) {
-  return !!(attr_table[c] & VOWEL);
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_voiced
-
-  char c
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_voiced(char c) {
-  return !!(attr_table[c] & VOICED);
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_unvoiced
-
-  char c
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_unvoiced(char c) {
-  return !!(attr_table[c] & UNVOICED);
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_sibilant
-
-  char c
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_sibilant(char c) {
-  return !!(attr_table[c] & SIBILANT);
+  return ((attr_table[(unsigned)c & 0xff] & m) == m);
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_ccv
-
-  char *s
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_ccv(char *s) {
-  if (is_consonant(s[0]) &&
+/*}}}*/
+int is_vowel(char c) {/*{{{*/
+  return !!(attr_table[(unsigned)c & 0xff] & VOWEL);
+}
+/*}}}*/
+int is_voiced(char c) {/*{{{*/
+  return !!(attr_table[(unsigned)c & 0xff] & VOICED);
+}
+/*}}}*/
+int is_unvoiced(char c) {/*{{{*/
+  return !!(attr_table[(unsigned)c & 0xff] & UNVOICED);
+}
+/*}}}*/
+int is_sibilant(char c) {/*{{{*/
+  return !!(attr_table[(unsigned)c & 0xff] & SIBILANT);
+}
+/*}}}*/
+int is_ccv(char *s) {/*{{{*/
+  if ((strlen(s) >= 3) &&
+      is_consonant(s[0]) &&
       is_consonant(s[1]) &&
       is_vowel(s[2])) {
     return 1;
@@ -171,40 +109,35 @@
     return 0;
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_cvv
-
-  char *s
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_cvv(char *s) {
-  if (is_consonant(s[0]) &&
+/*}}}*/
+int is_cvv(char *s) {/*{{{*/
+  int len = strlen(s);
+  if ((len >= 3) && 
+      is_consonant(s[0]) &&
       is_vowel(s[1]) &&
-      (is_vowel(s[2]) ||
-       (is_vowel(s[3]) && s[2] == '\''))) {
+      is_vowel(s[2])) {
     return 1;
   } else {
     return 0;
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_cvc
-
-  char *s
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_cvc(char *s) {
-  if (is_consonant(s[0]) &&
+/*}}}*/
+int is_cvav(char *s) {/*{{{*/
+  int len = strlen(s);
+  if ((len >= 4) && 
+      is_consonant(s[0]) &&
+      is_vowel(s[1]) &&
+      (s[2] == '\'') &&
+      is_vowel(s[3])) {
+    return 1;
+  } else {
+    return 0;
+  }
+}
+/*}}}*/
+int is_cvc(char *s) {/*{{{*/
+  if ((strlen(s) >= 3) &&
+      is_consonant(s[0]) &&
       is_vowel(s[1]) &&
       is_consonant(s[2])) {
     return 1;
@@ -212,19 +145,10 @@
     return 0;
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_cvccv
-
-  char *s
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_cvccv(char *s) {
-  if (is_consonant(s[0]) &&
+/*}}}*/
+int is_cvccv(char *s) {/*{{{*/
+  if ((strlen(s) >= 5) &&
+      is_consonant(s[0]) &&
       is_vowel(s[1]) &&
       is_consonant(s[2]) &&
       is_consonant(s[3]) &&
@@ -233,20 +157,22 @@
   } else {
     return 0;
   }
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  int is_ccvcv
-
-  char *s
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_ccvcv(char *s) {
-  if (is_consonant(s[0]) &&
+}/*}}}*/
+int is_cvccy(char *s) {/*{{{*/
+  if ((strlen(s) >= 5) &&
+      is_consonant(s[0]) &&
+      is_vowel(s[1]) &&
+      is_consonant(s[2]) &&
+      is_consonant(s[3]) &&
+      (s[4] == 'y')) {
+    return 1;
+  } else {
+    return 0;
+  }
+}/*}}}*/
+int is_ccvcv(char *s) {/*{{{*/
+  if ((strlen(s) >= 5) &&
+      is_consonant(s[0]) &&
       is_consonant(s[1]) &&
       is_vowel(s[2]) &&
       is_consonant(s[3]) &&
@@ -255,18 +181,21 @@
   } else {
     return 0;
   }
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Check whether a consonant pair is permissible
-
-  int is_pairok
-
-  char *s
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_pairok(char *s) {
+}/*}}}*/
+int is_ccvcy(char *s) {/*{{{*/
+  if ((strlen(s) >= 5) &&
+      is_consonant(s[0]) &&
+      is_consonant(s[1]) &&
+      is_vowel(s[2]) &&
+      is_consonant(s[3]) &&
+      (s[4] == 'y')) {
+    return 1;
+  } else {
+    return 0;
+  }
+}/*}}}*/
+int is_pairok(char *s) {/*{{{*/
+  /* Check whether a consonant pair is permissible */
   int test1, test2, test3, test4; /* all true if acceptable */
 
   test1 = (s[0] != s[1]);
@@ -279,17 +208,10 @@
   return test1 && test2 && test3 && test4;
 
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Return 1 if an initial consonant pair is acceptable for a lujvo, otherwise return 0.
-
-  int is_initial_pair_ok
-
-  char *s The string whose first 2 chars are to be tested.
-  ++++++++++++++++++++++++++++++++++++++*/
-
-int
-is_initialpairok(char *s) {
+/*}}}*/
+int is_initialpairok(char *s) {/*{{{*/
+  /* Return 1 if an initial consonant pair is acceptable for a lujvo, otherwise
+     return 0. */
 
   switch (s[0]) {
     case 'b':
@@ -315,112 +237,166 @@
       return 0;
   }
 }
+/*}}}*/
+int is_bad_triple (char *s) {/*{{{*/
+/* Check whether a triple is bad */
+  if (!strncmp(s,"ntc",3) || !strncmp(s,"nts",3) || !strncmp(s,"ndj",3) || !strncmp(s,"ndz",3))
+    return 1;
+  else
+    return 0;
+}
+/*}}}*/
 
+/* ========================================*/
 
-/*++++++++++++++++++++++++++++++++++++++
-  Return 1 if the string t has the correct form to be a lujvo (note,
-  there is no dictionary lookup of the apparent constituent rafsi)
+static int debug_ivl = 0;
 
-  int is_valid_lujvo
+#define ADVANCE(n) t+=(n), len-=(n)
 
-  char *t
-  ++++++++++++++++++++++++++++++++++++++*/
+/* is_valid_lujvo(), used by the jvocu'adju program to do tosmabru checks. */
 
 int
-is_valid_lujvo(char *t) {
-  char *p;
-  int debug = 0;
-
-  if (debug) printf("Testing [%s] as valid lujvo\n", t);
-  if (strlen(t) < 5) {
-    if (debug) printf("Initial length too short, invalid\n");
-    return 0;
-  }
-  while (*t) {
-    if (debug) printf("Residual [%s]\n", t);
-    p = strchr(t, 'y');
-    if ((p-t) == 4) {
-      /* Starts with abcdy, have to check whether abcd could be a valid rafsi */
-      if (is_consonant(t[0]) && is_consonant(t[1]) && is_vowel(t[2]) && is_consonant(t[3]) && is_initialpairok(t)) {
-        if (debug) printf("4 letter rafsi CCVC at start valid then y\n");
-        t+=5;
-      } else if (is_consonant(t[0]) && is_vowel(t[1]) && is_consonant(t[2]) && is_consonant(t[3]) && is_pairok(t+2)) {
-        if (debug) printf("4 letter rafsi CVCC at start valid then y\n");
-        t+=5;
+is_valid_lujvo(char *t)
+{
+  int len = strlen(t);
+  char buf[1024];
+
+  /* Flags to check CVV + hyphen stuff. */
+  int start_cvv;
+  int had_rn_hyphen;
+  int final_ccv;
+
+  /* Number of components seen */
+  int nrafsi;
+  
+  start_cvv = 0;
+  had_rn_hyphen = 0;
+  nrafsi = 0;
+
+  for (;;) {
+
+    if (debug_ivl) printf("Residual [%s]\n", t);
+    
+    if ((len == 5) && is_cvccv(t)) {/*{{{*/
+      if (nrafsi==0) return 0; /* Can't be initial */
+      if (!is_pairok(t+2)) return 0;
+      ADVANCE(5);
+      /*}}}*/
+    } else if ((len == 5) && is_ccvcv(t)) {/*{{{*/
+      if (nrafsi==0) return 0; /* Can't be initial */
+      if (!is_pairok(t)) return 0;
+      ADVANCE(5);
+      /*}}}*/
+    } else if (is_cvccy(t)) {/*{{{*/
+      if (len==5) return 0; /* Can't be final */
+      if (!is_pairok(t+2)) return 0;
+      ADVANCE(5);
+      /*}}}*/
+    } else if (is_ccvcy(t)) {/*{{{*/
+      if (len==5) return 0; /* Can't be final */
+      if (!is_initialpairok(t)) return 0;
+      ADVANCE(5);
+/*}}}*/
+    } else if (is_cvc(t)) {/*{{{*/
+      int pair_ok, bad_triple, smabru, initial, need_y;
+      if (len<6) return 0; /* Can't be final, must be at least 3 letters after
+                              now */
+      if (t[3] == 'y') {
+        buf[0] = t[2];
+        strcpy(buf+1, t+4);
+        pair_ok = is_pairok(buf);
+        bad_triple = is_bad_triple(buf);
+        initial = (nrafsi == 0);
+        smabru = initial ? is_valid_lujvo(buf) : 0;
+        need_y = bad_triple || (initial && smabru);
+
+        if (debug_ivl)
+          printf("cvc+y, pok=%d bt=%d init=%d smabru=%d\n",
+                 pair_ok, bad_triple, initial, smabru);
+
+        /* Check whether the y is unnecessary */
+        if (pair_ok && !need_y) return 0;
+            
+        ADVANCE(4);
+
       } else {
-        if (debug) printf("4 letter rafsi at start invalid\n");
-        return 0;
+        
+        pair_ok = is_pairok(t+2);
+        bad_triple = is_bad_triple(t+2);
+        initial = (nrafsi == 0);
+        smabru = initial ? is_valid_lujvo(t+2) : 0;
+        need_y = bad_triple || (initial && smabru);
+
+        if (debug_ivl)
+          printf("cvc, pok=%d bt=%d init=%d smabru=%d\n",
+                 pair_ok, bad_triple, initial, smabru);
+
+        /* Check whether there should be a y */
+        if (!pair_ok || need_y) return 0;
+            
+        ADVANCE(3);
       }
-    } else if ((p-t) == 3) {
-      /* Starts with 3 letter rafsi then a join */
-      if (is_ccv(t)) {
-        if (is_initialpairok(t)) {
-          /* Perhaps ought to check whether rafsi form is in dictionary? */
-          if (debug) printf("3 letter rafsi CCV at start valid then y\n");
-          return 1;
+      /*}}}*/
+    } else if (is_cvv(t)) {/*{{{*/
+      if (nrafsi == 0) {
+        if (len < 6) return 0;
+        /* Strip hyphen */
+        start_cvv = 1;
+        if (((t[3] == 'r') && (is_consonant(t[4]) && (t[4] != 'r'))) ||
+            ((t[3] == 'n') && (t[4] == 'r'))) {
+          had_rn_hyphen = 1;         
+          ADVANCE(4);
         } else {
-          if (debug) printf("3 letter rafsi CCV at start invalid then y\n");
-          return 0;
+          ADVANCE(3);
         }
-      } else if (is_cvc(t)) {
-        /* Dictionary test? */
-        if (debug) printf("3 letter rafsi CVC at start valid then y\n");
-        return 1;
-      } else if (is_cvv(t)) {
-        /* Not possible */
-        fprintf(stderr, "Can't have y after CVV form rafsi\n");
-        exit(1);
+      } else {
+        ADVANCE(3);
       }
-    } else if ((p-t) < 3) {
-      if (debug) printf("<3 letters left, invalid\n");
-      return 0;
-    } else if ((strlen(t) > 5) || (strlen(t) == 3)) {
-      /* Strip leading rafsi if valid */
-      if (is_ccv(t)) {
-        if (is_initialpairok(t)) {
-          if (debug) printf("Initial CCV, examine tail\n");
-          t+=3;
-          /* Go round loop again */
+      /*}}}*/
+    } else if (is_cvav(t)) {/*{{{*/
+      if (nrafsi == 0) {
+        if (len < 7) return 0;
+        /* Strip hyphen */
+        start_cvv = 1;
+        if (((t[4] == 'r') && (is_consonant(t[5]) && (t[5] != 'r'))) ||
+            ((t[4] == 'n') && (t[5] == 'r'))) {
+          had_rn_hyphen = 1;         
+          ADVANCE(5);
         } else {
-          if (debug) printf("Initial invalid CCV\n");
-          return 0;
-        }
-      } else if (is_cvc(t)) {
-        if (debug) printf("Initial CVC, examine tail\n");
-        t+=3;
-        /* Go round again */
-      } else if (is_cvv(t)) {
-        if (debug) printf("Initial CVV, examine tail\n");
-        if (t[2] == '\'') {
-          t+=4;
-        } else {
-          t+=3;
+          ADVANCE(4);
         }
       } else {
-        /* Not valid lujvo */
-        if (debug) printf("Invalid, prefix not any rafsi form\n");
-        return 0;
+        ADVANCE(4);
       }
-    } else if (strlen(t) == 5) {
-      /* Just a gismu left, assume OK if correct form */
-      if (is_cvccv(t) || is_ccvcv(t)) { 
-        if (debug) printf("Matches gismu form\n");
-        return 1;
-      } else {
-        if (debug) printf("Unmatched 5 character form\n");
-        return 0;
-      }
-    } else {
-      if (debug) printf("Unrecognized length\n");
+      /*}}}*/
+    } else if (is_ccv(t)) {/*{{{*/
+      if (!is_initialpairok(t)) return 0;
+ 
+      if (len == 3) final_ccv = 1;
+      ADVANCE(3);
+/*}}}*/
+    } else {/*{{{*/
+      /* Anything else is invalid */
       return 0;
+/*}}}*/
     }
+
+    nrafsi++;
+    if (!*t) break; /* At end of word */
   }
-  /* If we fall out of the loop, all rafsi have been checked off. */
-  return 1;
-}
 
+  /* Final processing to check validity */
+  if (start_cvv) {
+    if (((nrafsi == 2) && final_ccv) || had_rn_hyphen)
+      return 1;
+    else
+      return 0;
+  } else {
+    return 1;
+  }
+}
 
-#ifdef TEST
+#ifdef TEST_PRIMITIVES
 int main() {
   int c;
   for (c='a'; c<='z'; c++) {
@@ -429,5 +405,34 @@
   }
   return 0;
 }
+#endif
+
+#ifdef TEST_IS_VALID_LUJVO
+int main(int argc, char **argv) {
+  int ivl;
+  char *word = NULL;
+  
+  while (++argv, --argc) {
+    if (!strcmp(*argv, "-d")) {
+      debug_ivl = 1;
+    } else if ((*argv)[0] == '-') {
+      fprintf(stderr, "Unrecognized switch %s\n", *argv);
+    } else {
+      word = *argv;
+    }
+  }
+  
+  if (!word) {
+    fprintf(stderr, "Need a lujvo to check as an argument\n");
+    exit(1);
+  }
+
+  ivl = is_valid_lujvo(word);
+
+  printf("%s : %s\n", word, ivl ? "is a lujvo" : "not a lujvo");
+
+  return !ivl;
+}
+
 #endif
 
diff -urN jbofihe-0.36/lujvofns.h jbofihe-0.37/lujvofns.h
--- jbofihe-0.36/lujvofns.h	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/lujvofns.h	Wed Aug  8 22:41:48 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/lujvofns.h,v 1.1 2000/05/21 22:49:39 richard Exp $
+  $Header: /cvs/src/jbofihe/lujvofns.h,v 1.2 2001/07/08 21:49:33 richard Exp $
 
   
   ***************************************/
@@ -33,11 +33,15 @@
 int is_sibilant(char c);
 int is_ccv(char *s);
 int is_cvv(char *s);
+int is_cvav(char *s);
 int is_cvc(char *s);
 int is_cvccv(char *s);
 int is_ccvcv(char *s);
+int is_cvccy(char *s);
+int is_ccvcy(char *s);
 int is_pairok(char *s);
 int is_initialpairok(char *s);
+int is_bad_triple(char *s);
 int is_valid_lujvo(char *t);
 
 
diff -urN jbofihe-0.36/morf.c jbofihe-0.37/morf.c
--- jbofihe-0.36/morf.c	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/morf.c	Wed Aug  8 22:41:48 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/morf.c,v 1.19 2001/02/28 23:32:11 richard Exp $
+  $Header: /cvs/src/jbofihe/morf.c,v 1.29 2001/07/29 21:56:35 richard Exp $
 
   Carry out the morphology functions (hence the name) - take consecutive
   strings of non-whitespace from the input stream and determine what kind of
@@ -35,10 +35,15 @@
 #ifdef TEST_MORF
 #include <stdio.h>
 #include <string.h>
+#include <assert.h>
+#include "canonluj.h"
 static int verbose=0;
-static int allow_cultural_rafsi = 0;
+static int expand_lujvo = 0; /* Show decomposition of lujvo */
+static int allow_cultural_rafsi = 1; /* In testbench mode, always allow */
 #else
 
+#include <stddef.h>
+
 /* When linked into the main program, get this option from the command line
    in main.c */
 extern int allow_cultural_rafsi;
@@ -46,37 +51,24 @@
 #endif
 
 #include "morf.h"
+#include "morf_dfa.h"
+#include "bccheck.h"
 
-#define R_UNKNOWN       0
-#define R_CMAVOS        1
-#define R_CMAVOS_END_CY 2
-#define R_GISMU_0       3
-#define R_GISMU_1       4
-#define R_LUJVO_0       5
-#define R_LUJVO_1       6
-#define R_STAGE3_0      7
-#define R_STAGE3_1      8
-#define R_STAGE3_1_CVC  9
-#define R_STAGE4_0     10
-#define R_STAGE4_1     11
-#define R_CMENE        12
-#define R_BAD_TOSMABRU 13
-#define R_BAD_SLINKUI  14
-
-/* Define the values returned by priority coding the bit patterns */
-#define W_UNKNOWN         0
-#define W_CMAVOS          1
-#define W_CMAVOS_END_CY   2
-#define W_GISMU           3
-#define W_LUJVO           4
-#define W_FUIVLA3         5
-#define W_FUIVLA3_CVC     6
-#define W_FUIVLA4         7
-#define W_CMENE           8
-#define W_BAD_TOSMABRU    9
-#define W_BAD_SLINKUI    10
-#define W_BIZARRE        11
-
+enum processed_category {/*{{{*/
+  W_UNKNOWN,
+  W_CMAVOS, W_CMAVOS_END_CY,
+  W_GISMU,
+  W_LUJVO,
+  W_CULTURAL_LUJVO,
+  W_FUIVLA3, W_FUIVLA3_CVC,
+  W_FUIVLA3X, W_FUIVLA3X_CVC,
+  W_FUIVLA4,
+  W_CMENE,
+  W_BAD_TOSMABRU, W_CULTURAL_BAD_TOSMABRU,
+  W_BAD_SLINKUI,
+  W_BIZARRE
+};
+/*}}}*/
 /* Include table for turning the letter stream into meta-classes (consonant,
  * vowel, permissible pair etc).  These 'meta-classes' are the tokens used by
  * the DFA.  These tables are built my mk_fetab.pl */
@@ -85,59 +77,56 @@
 /* Include file for checking vowel pairs/clusters within the input stream. */
 #include "morfvlex.c"
 
-/* Include the main DFA scanning tables (including cultural rafsi) built by
-   nfa2dfa.pl. */
-#include "morf_dfa.c"
-
-/* Include the main DFA scanning tables (excluding cultural rafsi) built by
-   nfa2dfa.pl. */
-#include "morfnc_dfa.c"
-
+static unsigned char s2l[32] = /*{{{*/
 /* Map N->1, R->2, other C->3, else ->0.  Used to trim down the last-but-one
  * letter, which is saved to allow the front-end to spot illegal triples and
  * type III fu'ivla hyphen patterns. */
-
-static unsigned char s2l[32] = {
+{
   0, 0, 3, 3, 3, 0, 3, 3,
   0, 0, 3, 3, 3, 3, 1, 0,
   3, 0, 2, 3, 3, 0, 3, 0,
   3, 0, 3, 0, 0, 0, 0, 0
 };
+/*}}}*/
 
 #if defined(TEST_MORF)
+static char *toknam[] =/*{{{*/
 /* Token names for -v mode */
-static char *toknam[] = {
+{
   "UNK", "V", "APOS", "Y", "R", "N", "C",
   "NR", "CI", "CSI", "CP", "CS", "CN",
   "H", "HS", "BT", "VV", "VX", "VY", "YY",
   "Y,Y"
 };
-
+/*}}}*/
+static char *actnam[] =/*{{{*/
 /* Front end state machine actions, printable for -v mode */
-static char *actnam[] = {
+{
   "CLR", "SFT", "FRZ"
 };
-
-static char charnames[32] = {
+/*}}}*/
+static char charnames[32] = {/*{{{*/
   '?', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
   '?', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
   'p', '?', 'r', 's', 't', 'u', 'v', 'w',
   'x', 'y', 'z', '?', '?', '?', '\'', '?'
 };
-
-static char vowelnames[8] = {
+/*}}}*/
+static char vowelnames[8] = {/*{{{*/
   ',', 'C', 'y', 'a', 'e', 'i', 'o', 'u'
 };
-
-static char Lname[4] = { 'V', 'n', 'r', 'C' };
-
+/*}}}*/
+static char Lname[4] = {/*{{{*/
+  'V', 'n', 'r', 'C'
+};
+/*}}}*/
 #endif
 
+static unsigned char mapchar[256] =/*{{{*/
 /* Map the ASCII set to the range 0..31 (mostly
    by masking high order bits off the letters,
    except the apostrophe is given the value 30) */
-
-static unsigned char mapchar[256] = {
+{
   0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
   0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
   0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
@@ -178,8 +167,8 @@
   0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
   0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f
 };
-
-static unsigned char vmapchar[256] = {
+/*}}}*/
+static unsigned char vmapchar[256] = {/*{{{*/
   0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* invalid -> consonant code */
   0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* (reject by main FSM) */
   0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
@@ -220,57 +209,15 @@
   0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
   0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01
 };
-
-
-struct StateTransTable {
-  short *ns;
-  unsigned char *tok;
-  unsigned short *base;
-  short *def;
-};
-
-static struct StateTransTable morf_stt = {morf_nextstate, morf_token, morf_base, morf_defstate};
-static struct StateTransTable morfnc_stt = {morfnc_nextstate, morfnc_token, morfnc_base, morfnc_defstate};
+/*}}}*/
 
 /*********************************************************************/
-/* Given the current state and the 'token' read, find the next state */
-/*********************************************************************/
-
-int find_next_state(struct StateTransTable *tab, int cs, unsigned char k)
-{
-  int h, l, m;
-  unsigned char xm;
-  unsigned char *t = tab->tok;
-  unsigned short *nstab = tab->ns;
-  unsigned int kk = k;
-
-  while (cs >= 0) {
-    l = tab->base[cs];
-    h = tab->base[cs+1];
-    while (h > l) {
-      m = (h + l) >> 1;
-      xm = t[m];
-      if (xm == kk) goto done;
-      if (m == l) break;
-      if (xm>kk) h = m;
-      else       l = m;
-    }
-    cs = tab->def[cs]; /* Move onto next subtable to check. */
-  }
-
-  /* Tried all tables without a match, fail */
-  return -1;
-
-done:
-  return (int)nstab[m];
-}
 
+MorfType morf_scan(char *s, char ***buf_end, struct morf_xtra *arg_xtra)/*{{{*/
 /* The main scanning routine.  's' is the string to be scanned.  buf_end is a
- * pointer to a table of pointers to characters (i.e. pass by reference so we
- * can pass a result back.)  This table is filled in with the positions in 's'
- * where the prefix cmavo start. */
-MorfType
-morf_scan(char *s, char ***buf_end)
+   pointer to a table of pointers to characters (i.e. pass by reference so we
+   can pass a result back.)  This table is filled in with the positions in 's'
+   where the prefix cmavo start. */
 {
   unsigned int L, S, G;
   unsigned int vsm = 0111; /* 3 consonants as starting state */
@@ -280,16 +227,32 @@
   int ent, tent, tok, set, inhibit;
   int state, next_state;
   int inhibited = 0, initial = 1;
-  int exival;
   int decrement = 0;
-  int result;
+
+  /* Remember previous non-comma positions */
+  char *p_1 = NULL, *p_2 = NULL;
+  
+  /* Remember position for stage-3 hyphen after CVC or long rafsi prefix */
+  char *hyph3 = NULL;
+  char *hyph4 = NULL;
+  
+  enum raw_category exival;
+  enum processed_category result;
   int had_uppercase=0;
   int letter_uppercase;
   int ended_with_comma=0;
   int started_with_comma=0;
   MorfType ext_result;
-  short *exitval_table;
-  struct StateTransTable *stt;
+
+  /* Gather info in a local copy, in case client doesn't want it.
+   * (We have to gather it anyway, to support the outputs in test
+   * mode.). */
+
+  struct morf_xtra xtra;
+
+#ifdef TEST_MORF
+  int split_cmene = 0;
+#endif
 
   typedef enum {
     ACT_CLEAR=0, ACT_SHIFT=1, ACT_FREEZE=2
@@ -297,8 +260,6 @@
 
   Action act, last_act;
   
-  stt = allow_cultural_rafsi ? &morf_stt : &morfnc_stt;
-  
   start = *buf_end;
   pstart = start;
 
@@ -317,6 +278,7 @@
   p = s;
   started_with_comma = (*p == ',');
 
+  /*{{{  Main per-character loop */
   while (*p) {
     c = *p;
 
@@ -379,10 +341,28 @@
       *pcstart++ = p;
     }
 
+    /*{{{  Run attribute code*/
+    switch (morf_attribute[state]) {
+      case AT_UNKNOWN:
+        break;
+      case AT_S3_3:
+      case AT_XS3_3:
+        hyph3 = p_2;
+        break;
+      case AT_S3_4:
+      case AT_XS3_4:
+        hyph4 = p_2;
+        break;
+    }
+    /*}}}*/
+
+    p_2 = p_1;
+    p_1 = p;
     p++;
     initial = 0;
 
-    next_state = find_next_state(stt, state, tok);
+    /* next_state function from file built by dfa builder */
+    next_state = morf_next_state(state, tok);
 
 #ifdef TEST_MORF
     if (verbose) {
@@ -393,6 +373,7 @@
     }
 #endif
 
+    /*{{{  Run action on main (consonant) shift reg. */
     switch (act) {
       case ACT_CLEAR:
         L = S = 0;
@@ -406,27 +387,27 @@
       default:
         abort();
     }
-    
     last_act = act;
-
+    /*}}}*/
+    
     state = next_state;
 
     if (state < 0) break; /* syntax error */
     
   }
+  /*}}}*/
 
   if (!*p && !(vsm & 0x7)) { /* last char was a comma */
     ended_with_comma = 1;
   }
   
-  exitval_table = allow_cultural_rafsi ? morf_exitval : morfnc_exitval;
-  
-  if ((state < 0) || started_with_comma || ended_with_comma || (exitval_table[state] == 0)) {
+  if ((state < 0) || started_with_comma || ended_with_comma || (morf_exitval[state] == R_UNKNOWN)) {
     result = W_UNKNOWN;
     ext_result = MT_BOGUS;
     decrement = 0;
   } else {
-    exival = exitval_table[state];
+    exival = morf_exitval[state];
+    /*{{{  Extract word-type and CV/CC start flag */
     switch (exival) {
       case R_CMAVOS: result = W_CMAVOS; decrement = 0; break;
       case R_CMAVOS_END_CY: result = W_CMAVOS_END_CY; decrement = 0; break;
@@ -434,18 +415,27 @@
       case R_GISMU_1: result = W_GISMU; decrement = 1; break;
       case R_LUJVO_0: result = W_LUJVO; decrement = 0; break;
       case R_LUJVO_1: result = W_LUJVO; decrement = 1; break;
+      case R_CULTURAL_LUJVO_0: result = W_CULTURAL_LUJVO; decrement = 0; break;
+      case R_CULTURAL_LUJVO_1: result = W_CULTURAL_LUJVO; decrement = 1; break;
       case R_STAGE3_0: result = W_FUIVLA3; decrement = 0; break;
       case R_STAGE3_1: result = W_FUIVLA3; decrement = 1; break;
       case R_STAGE3_1_CVC: result = W_FUIVLA3_CVC; decrement = 1; break;
+      case R_X_STAGE3_0: result = W_FUIVLA3X; decrement = 0; break;
+      case R_X_STAGE3_0_CVC: result = W_FUIVLA3X_CVC; decrement = 0; break;
+      case R_X_STAGE3_1: result = W_FUIVLA3X; decrement = 1; break;
+      case R_X_STAGE3_1_CVC: result = W_FUIVLA3X_CVC; decrement = 1; break;
       case R_STAGE4_0: result = W_FUIVLA4; decrement = 0; break;
       case R_STAGE4_1: result = W_FUIVLA4; decrement = 1; break;
       case R_CMENE: result = W_CMENE; decrement = 0; break;
       case R_BAD_TOSMABRU: result = W_BAD_TOSMABRU; decrement = 1; break;
+      case R_CULTURAL_BAD_TOSMABRU: result = W_CULTURAL_BAD_TOSMABRU; decrement = 1; break;
       case R_BAD_SLINKUI: result = W_BAD_SLINKUI; decrement = 0; break;
       case R_UNKNOWN:
       default:
         result = W_UNKNOWN; decrement = 0; break;
     }
+    /*}}}*/
+    /*{{{  Map to external word type / uppercase validity test */
     switch (result) {
       case W_CMAVOS_END_CY:
         /* Add start of trailing Cy cmavo to list of word start points */
@@ -469,19 +459,37 @@
         ext_result = had_uppercase ? MT_BAD_UPPERCASE : MT_LUJVO;
         if (decrement) pstart--;
         break;
+      case W_CULTURAL_LUJVO:
+        ext_result = had_uppercase        ? MT_BAD_UPPERCASE :
+                     allow_cultural_rafsi ? MT_LUJVO : MT_BOGUS ;
+        if (decrement) pstart--;
+        break;
       case W_FUIVLA3:
         ext_result = had_uppercase ? MT_BAD_UPPERCASE : MT_FUIVLA3;
         if (decrement) pstart--;
+        xtra.u.stage_3.hyph = hyph4;
         break;
       case W_FUIVLA3_CVC:
         ext_result = had_uppercase ? MT_BAD_UPPERCASE : MT_FUIVLA3_CVC;
         if (decrement) pstart--;
+        xtra.u.stage_3.hyph = hyph3;
+        break;
+      case W_FUIVLA3X:
+        ext_result = had_uppercase ? MT_BAD_UPPERCASE : MT_FUIVLA3X;
+        if (decrement) pstart--;
+        xtra.u.stage_3.hyph = hyph4;
+        break;
+      case W_FUIVLA3X_CVC:
+        ext_result = had_uppercase ? MT_BAD_UPPERCASE : MT_FUIVLA3X_CVC;
+        if (decrement) pstart--;
+        xtra.u.stage_3.hyph = hyph3;
         break;
       case W_FUIVLA4:
         ext_result = had_uppercase ? MT_BAD_UPPERCASE : MT_FUIVLA4;
         if (decrement) pstart--;
         break;
       case W_BAD_TOSMABRU:
+      case W_CULTURAL_BAD_TOSMABRU:
       case W_BAD_SLINKUI:
         /* Don't care about uppercase/lowercase status */
         ext_result = MT_BOGUS;
@@ -491,24 +499,28 @@
         /* Cmene are allowed to have uppercase letters in them. */
         ext_result = MT_CMENE;
         pstart = start+1;
+        xtra.u.cmene.is_bad = is_bad_cmene(s, &xtra.u.cmene.can_split,
+          &xtra.u.cmene.ladoi, &xtra.u.cmene.tail);
         break;
       default:
         ext_result = MT_BOGUS;
         break;
     }
+    /*}}}*/
   }
 
 #ifdef TEST_MORF
-  if ((state < 0) || (exitval_table[state] == 0)) {
-    printf("%-25s : UNMATCHED!\n", s);
+  if ((state < 0) || (morf_exitval[state] == 0)) {
+    printf("%-25s : UNMATCHED : %s\n", s, s);
   } else {
     char *a;
     char **x;
 
     printf("%-25s : ", s);
-    printf("[EV=%2d] ", exival);
 
+    if (verbose) printf("[EV=%2d] ", exival);
 
+    /*{{{  Print word type */
     switch (result) {
       case W_UNKNOWN:
         printf("Unrecognized");
@@ -525,21 +537,40 @@
       case W_LUJVO:
         printf("lujvo");
         break;
-      case W_FUIVLA3_CVC:
-        printf("fu'ivla (stage-3 short rafsi)");
+      case W_CULTURAL_LUJVO:
+        printf("lujvo (with cultural rafsi)");
         break;
       case W_FUIVLA3:
         printf("fu'ivla (stage-3)");
         break;
+      case W_FUIVLA3_CVC:
+        printf("fu'ivla (stage-3 short rafsi)");
+        break;
+      case W_FUIVLA3X:
+        printf("fu'ivla (multi-stage-3)");
+        break;
+      case W_FUIVLA3X_CVC:
+        printf("fu'ivla (multi-stage-3, final short rafsi)");
+        break;
       case W_FUIVLA4:
         printf("fu'ivla (stage-4)");
         break;
       case W_CMENE:
-        printf("cmene");
+        if (xtra.u.cmene.is_bad && xtra.u.cmene.can_split) {
+          printf("bad cmene (breaks up)");
+        } else if (xtra.u.cmene.is_bad && !xtra.u.cmene.can_split) {
+          printf("bad cmene (doesn't break up)");
+        } else if (!xtra.u.cmene.is_bad) {
+          printf("cmene");
+        }
+        split_cmene = xtra.u.cmene.is_bad && xtra.u.cmene.can_split;
         break;
       case W_BAD_TOSMABRU:
         printf("Bad lujvo (y hyphen not required)");
         break;
+      case W_CULTURAL_BAD_TOSMABRU:
+        printf("Bad lujvo (y hyphen not required, with cultural rafsi)");
+        break;
       case W_BAD_SLINKUI:
         printf("Bad fu'ivla (fails slinku'i test)");
         break;
@@ -547,55 +578,145 @@
         printf("Internal program bug");
         break;
     }
-
+    /*}}}*/
+    /*{{{  Print invalid uppercase msg*/
     switch (result) {
       case W_UNKNOWN:
       case W_CMAVOS:
       case W_CMAVOS_END_CY:
       case W_GISMU:
       case W_LUJVO:
+      case W_CULTURAL_LUJVO:
       case W_FUIVLA3:
       case W_FUIVLA3_CVC:
+      case W_FUIVLA3X:
+      case W_FUIVLA3X_CVC:
       case W_FUIVLA4:
       case W_BAD_TOSMABRU:
+      case W_CULTURAL_BAD_TOSMABRU:
       case W_BAD_SLINKUI:
         if (had_uppercase) {
           printf(" (contains invalid uppercase)");
         }
         break;
+      case W_CMENE:
+      case W_BIZARRE:
+        /* Nothing to do */
+        break;
     }
+    /*}}}*/
 
     putchar(' ');
     putchar(':');
     putchar(' ');
 
+    /* {{{ Assert that attributes were picked up properly */
+    switch (result) {
+      case W_FUIVLA3:
+      case W_FUIVLA3X:
+        assert(hyph4);
+        break;
+      case W_FUIVLA3_CVC:
+      case W_FUIVLA3X_CVC:
+        assert(hyph3);
+        break;
+      default:
+        break;
+    }
+    /*}}}*/
+
+    /*{{{  Print original word with prefix cmavo split off */
     for (a=s, x=start; *a; a++) {
+
+      /* Print spaces to separate prefix cmavo */
       if (x && (a == *x)) {
         x++;
         putchar(' ');
         if (x == pstart) x = NULL;
       }
+      
+      /*{{{  Insert pre-char separators */
+      switch (result) {
+        case W_FUIVLA3:
+        case W_FUIVLA3X:
+          if (a == hyph4) putchar('/');
+          break;
+        case W_FUIVLA3_CVC:
+        case W_FUIVLA3X_CVC:
+          if (a == hyph3) putchar('/');
+          break;
+        case W_CMENE:
+          if (split_cmene) {
+            /* Show divisions between parts of the split, but omit the first
+             * marker if la or doi occurs at the very start of the word. */
+            if (((a == xtra.u.cmene.ladoi) && (a != s)) ||
+                (a == xtra.u.cmene.tail)) {
+              putchar ('+');
+            }
+          }
+          break;
+        default:
+          break;
+      }
+      /*}}}*/
+
+      /* Emit actual character */
       putchar(*a);
+      
+      /*{{{  Insert post-char separators */
+      switch (result) {
+        case W_FUIVLA3:
+        case W_FUIVLA3X:
+          if (a == hyph4) putchar('/');
+          break;
+        case W_FUIVLA3_CVC:
+        case W_FUIVLA3X_CVC:
+          if (a == hyph3) putchar('/');
+          break;
+        default:
+          break;
+      }
+      /*}}}*/
     }
+    /*}}}*/
+
+    /*{{{ Show expansion of lujvo */
+    switch(result) {
+      case W_LUJVO:
+        if (expand_lujvo) {
+          char *canon;
+          canon = canon_lujvo(pstart[-1]);
+          printf(" [%s]", canon);
+        }
+        break;
+
+      default:
+        break;
+    }
+    /*}}}*/
+    
     putchar('\n');
     
   }
 #endif
 
   *buf_end = pstart - 1;
+  /* Allow arg_xtra to be NULL, as the data isn't always needed */
+  if (arg_xtra) *arg_xtra = xtra;
   return ext_result;
 }
+/*}}}*/
 
 #ifdef TEST_MORF
-int main (int argc, char **argv) {
+int main (int argc, char **argv) {/*{{{*/
   char buffer[128];
   char *start[256], **pstart;
   char *word = NULL;
   while (++argv, --argc) {
     if (!strncmp(*argv, "-v", 2)) {
       verbose = 1;
-    } else if (!strncmp(*argv, "-cr", 2)) {
-      allow_cultural_rafsi = 1;
+    } else if (!strcmp(*argv, "-el")) {
+      expand_lujvo = 1;
     } else if (!strncmp(*argv, "-", 1)) {
       fprintf(stderr, "Unrecognized command line argument '%s'\n", *argv);
     } else {
@@ -604,17 +725,17 @@
   }
   if (word) {
     pstart = start;
-    morf_scan(word, &pstart);
+    morf_scan(word, &pstart, NULL);
   } else {
     while (fgets(buffer, sizeof(buffer), stdin)) {
       buffer[strlen(buffer)-1] = 0;
       if (buffer[0] == '#') continue; /* Allow comment lines in test source file */
       pstart = start;
-      morf_scan(buffer, &pstart);
+      morf_scan(buffer, &pstart, NULL);
     }
   }
   return 0;
-}
+}/*}}}*/
 #endif
 
 
diff -urN jbofihe-0.36/morf.h jbofihe-0.37/morf.h
--- jbofihe-0.36/morf.h	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/morf.h	Wed Aug  8 22:41:48 2001
@@ -1,10 +1,28 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/morf.h,v 1.4 2001/02/28 23:30:26 richard Exp $
+  $Header: /cvs/src/jbofihe/morf.h,v 1.7 2001/06/22 22:16:51 richard Exp $
 
   Header file for morphology functions module.
   
   ***************************************/
 
+/* Copyright (C) Richard P. Curnow  2000-2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
 #ifndef MORF_H
 #define MORF_H
 
@@ -17,14 +35,33 @@
   MT_LUJVO,
   MT_FUIVLA3, /* stage 3 */
   MT_FUIVLA3_CVC, /* stage 3 starting with CVC rafsi */
+  MT_FUIVLA3X, /* stage-3-like with >1 rafsi before hyphen */
+  MT_FUIVLA3X_CVC, /* stage-3-like starting with CVC rafsi */
   MT_FUIVLA4, /* stage 4 */
   MT_CMAVOS,
   MT_CMENE,
   MT_BAD_UPPERCASE
 } MorfType;
 
+/* Structure for returning extra information about some of the
+ * word types */
+
+struct morf_xtra {
+  union {
+    struct {
+      int is_bad;
+      int can_split;
+      char *ladoi;
+      char *tail;
+    } cmene;
+    struct {
+      char *hyph;
+    } stage_3;
+  } u;
+};
+
 /* Prototypes for fns */
-extern MorfType morf_scan(char *s, char ***buf_end);
+extern MorfType morf_scan(char *s, char ***buf_end, struct morf_xtra *);
 
 #endif /* MORF_H */
 
diff -urN jbofihe-0.36/morf_dfa.h jbofihe-0.37/morf_dfa.h
--- jbofihe-0.36/morf_dfa.h	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/morf_dfa.h	Wed Aug  8 22:41:48 2001
@@ -0,0 +1,60 @@
+/***************************************
+  $Header: /cvs/src/jbofihe/morf_dfa.h,v 1.2 2001/07/29 21:56:35 richard Exp $
+
+  Header file for interface between morf.c and built file morf_dfa.c
+  
+  ***************************************/
+
+/* Copyright (C) Richard P. Curnow  2000-2001 */
+/*
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * 
+*/
+
+#ifndef MORF_DFA_H
+#define MORF_DFA_H
+
+enum raw_category {/*{{{*/
+  R_UNKNOWN,
+  R_CMAVOS, R_CMAVOS_END_CY,
+  R_GISMU_0, R_GISMU_1,
+  R_LUJVO_0, R_LUJVO_1,
+  R_CULTURAL_LUJVO_0, R_CULTURAL_LUJVO_1,
+  R_STAGE3_0, R_STAGE3_1, R_STAGE3_1_CVC,
+  R_X_STAGE3_0, R_X_STAGE3_0_CVC,
+  R_X_STAGE3_1, R_X_STAGE3_1_CVC,
+  R_STAGE4_0, R_STAGE4_1,
+  R_CMENE,
+  R_BAD_TOSMABRU, R_CULTURAL_BAD_TOSMABRU,
+  R_BAD_SLINKUI
+};
+/*}}}*/
+
+enum state_attribute {/*{{{*/
+  AT_UNKNOWN, /* nothing-to-do option */
+  AT_S3_3, /* after hyphen triplet for short-rafsi stage 3 */
+  AT_S3_4, /* after hyphen triplet for long-rafsi stage 3 */
+  AT_XS3_3, /* after hyphen triplet for short-rafsi extended stage 3 */
+  AT_XS3_4, /* after hyphen triplet for long-rafsi extended stage 3 */
+};
+/*}}}*/
+
+extern enum state_attribute morf_attribute[];
+extern enum raw_category morf_exitval[];
+extern int morf_next_state(int, int);
+
+#endif /* MORF_DFA_H */
+
+
diff -urN jbofihe-0.36/morf_nfa.in jbofihe-0.37/morf_nfa.in
--- jbofihe-0.36/morf_nfa.in	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/morf_nfa.in	Wed Aug  8 22:41:48 2001
@@ -12,7 +12,7 @@
 # - rigorously checks the word form for errors (bad clusters, y where not
 #   required, bad hyphenation after initial CVV rafsi, bad vowel pairing etc)
 #
-# $Header: /cvs/src/jbofihe/morf_nfa.in,v 1.17 2001/03/04 22:13:09 richard Exp $
+# $Header: /cvs/src/jbofihe/morf_nfa.in,v 1.27 2001/07/29 21:56:36 richard Exp $
 #
 #
 # Copyright (C) Richard P. Curnow  1998-2001
@@ -33,6 +33,11 @@
 #
 #
 ###########################################################
+# Stuff to pass through verbatim to C output file
+%{
+#include "morf_dfa.h"
+%}
+###########################################################
 
 # Declare all symbols in same order as lexer (lexer returns (0 .. whatever)
 # when it recognizes the corresponding token)
@@ -82,11 +87,8 @@
 #   CULTURAL' first.
 
 # Prefix applied to the tables written out for inclusion into the C program.
-# (If building the version including the cultural (section 4.16) rafsi, the
-# prefix will be morf.  If building the version without this, the prefix will
-# be morfnc
-Prefix morfnc
-Prefix morf ## CULTURAL
+
+Prefix morf
 
 Abbrev CNR = C|N|R
 Abbrev LCI = CI|CSI
@@ -94,7 +96,7 @@
 
 ###########################################################
 # Subcomponents for lujvo matching
-
+#{{{ BLOCK SYL1
 BLOCK SYL1
 
     STATE in
@@ -104,16 +106,14 @@
         CNR ; V   ; CNR                   -> ex_cvc # Starts CVC, may require tosmabru check
         CNR ; V   ; CNR ; LCP             -> ex_cvy # Ditto, starts CV
         C   ; LCI ; V   ; CNR             -> ex_y   # Requires y before next syl
-        C   ; LCI ; V          ; VV ; CNR -> ex_y                  ## CULTURAL
-        C   ; LCI ; V   ; APOS ; V  ; CNR -> ex_y                  ## CULTURAL
     
 ENDBLOCK
+#}}}
+#{{{ BLOCK AFTER1
+BLOCK AFTER1
 
-###########################################################
 # Glue coming between first syllable (i.e. rafsi) and what follows
 
-BLOCK AFTER1
-
     STATE in_nr
         R ; CP|CS -> to_lujvo1, to_after_nr_hyphen
         N ; NR -> to_lujvo1, to_after_nr_hyphen
@@ -128,17 +128,17 @@
 
     STATE in_cvy
         Y ; LCP|CN|HS|H|BT -> to_lujvo1, to_pair1
+        Y ; CP|H ; BT      -> to_in_after_cc1
 
     STATE in_cvc
         LCP         -> to_lujvo1, to_pair1
         Y ; CN      -> to_lujvo1, to_pair1
         Y ; LCI     -> to_lujvo1t, to_pair1t, to_tosmabru
-        Y ; CP|CS ; BT -> to_in_after_cc1
+        Y ; CP ; BT -> to_in_after_cc1
         
 ENDBLOCK
-
-###########################################################
-
+#}}}
+#{{{ BLOCK SYL2
 BLOCK SYL2 # Lujvo syllables (i.e. rafsi) 2 .. (N-1)
     STATE in
         V   ; VV                 -> ex_cln
@@ -147,8 +147,6 @@
         V   ; CNR                -> ex_cvc
         V   ; CNR  ; LCP         -> ex_y
         LCI ; V    ; CNR         -> ex_y
-        LCI ; V ; APOS ; V ; CNR -> ex_y  ## CULTURAL
-        LCI ; V ; VV   ;     CNR -> ex_y  ## CULTURAL
         
 
     STATE in_after_cc # get here if last syl. ends in c, which when combined
@@ -156,35 +154,31 @@
 
         V                  -> ex_cln            # from ccv form
         V ; CNR            -> ex_y              # from ccvc form
-        V ; APOS ; V ; CNR -> ex_y ## CULTURAL
-        V ; VV       ; CNR -> ex_y ## CULTURAL
     
 ENDBLOCK
-
-###########################################################
-
-# Linkage from rafsi 2->3, ..., (N-1)->N
+#}}}
+#{{{ BLOCK AFTER2
 BLOCK AFTER2
     
+# Linkage from rafsi 2->3, ..., (N-1)->N
+
     STATE in_cln
         CNR                  -> exit
         
     STATE in_cvc
         Y ; CN               -> exit
-        Y ; CP|CS ; BT       -> exit_after_cc
+        Y ; CP ; BT          -> exit_after_cc
         LCP                  -> exit
 
     STATE in_y
         Y ; LCP|CN|HS|H|BT   -> exit
-        Y ; CP|CS ; BT       -> exit_after_cc
+        Y ; CP|H ; BT        -> exit_after_cc
     
 ENDBLOCK
-
-###########################################################
-
-# Lujvo final syllable
-
+#}}}
+#{{{ BLOCK SYLN
 BLOCK SYLN
+# Lujvo final syllable
     STATE in_main
         V   ; APOS ; V          -> exit  # final CV'V
         V   ; VV                -> exit  # final CVV
@@ -207,8 +201,10 @@
         V ; CNR ; V             -> exit  # final CCVCV
 
 ENDBLOCK
+#}}}
+#{{{ BLOCK SYL2N
+BLOCK SYL2N
 
-###########################################################
 # Everything from start of 2nd syllable (less initial consonant picked off in
 # AFTER1) through to end of lujvo.  This is in a block because 3 instances are
 # made ; one to recognize lujvo which start with a cluster (lujvo_0), a second
@@ -218,7 +214,6 @@
 # 'tosmabru' block scans the sequence that would have been the shorter lujvo to
 # check it's valid; if not, the 'y' was a bogus insertion.)
 
-BLOCK SYL2N
     s2 : SYL2
     a2 : AFTER2
     sn : SYLN
@@ -237,39 +232,34 @@
     STATE a2.exit_after_cc -> s2.in_after_cc, sn.in_after_cc
 
 ENDBLOCK
-
-###########################################################
+#}}}
+#{{{ BLOCK TOS_SYL1
+BLOCK TOS_SYL1
 # Match syllable 1 of what would be the shorter lujvo in a potential tosmabru
 # failure.
-
-BLOCK TOS_SYL1
     STATE in
         V                  -> ex_cln
         V ; CNR            -> ex_y
-        V ; APOS ; V ; CNR -> ex_y ## CULTURAL
-        V ; VV       ; CNR -> ex_y ## CULTURAL
 
 ENDBLOCK
-
-###########################################################
-# Glue after 1st syllable of potential shorter lujvo (binds 1st syl. to 2nd)
-
+#}}}
+#{{{ BLOCK TOS_AFTER1
 BLOCK TOS_AFTER1
+# Glue after 1st syllable of potential shorter lujvo (binds 1st syl. to 2nd)
 
     STATE in_cln
         CNR -> exit
 
     STATE in_y
-        Y ; LCP|CN|     -> exit
+        Y ; LCP|CN      -> exit
         Y ; CP|CS ; BT  -> exit_to_after_cc
 
 ENDBLOCK
-
-###########################################################
+#}}}
+#{{{ BLOCK TOSMABRU
+BLOCK TOSMABRU
 # To check tail portion of word to see if it too is of lujvo
 # form.
-
-BLOCK TOSMABRU
     s1 : TOS_SYL1
     a1 : TOS_AFTER1
     tail : SYL2N
@@ -283,12 +273,12 @@
     STATE tail.sn.exit -> exit
     
 ENDBLOCK
+#}}}
+#{{{ BLOCK LUJVO_BODY
+BLOCK LUJVO_BODY
 
-###########################################################
 # The complete NFA for matching a word of lujvo form.
 
-BLOCK LUJVO
-
     s1 : SYL1
     a1 : AFTER1
 
@@ -330,30 +320,130 @@
 
     # Bridge a1->t1t
     STATE a1.to_lujvo1t -> t1t.in
+    
+ENDBLOCK
+#}}}
+#{{{ BLOCK LUJVO
+BLOCK LUJVO
+# This block deals with recognition of 'normal' lujvo.
+
+    body : LUJVO_BODY
+
+    STATE in -> body.in
 
     # Set exit states on t0
-    STATE t0.sn.exit = TAG_LUJVO_0
+    STATE body.t0.sn.exit = TAG_LUJVO_0
     
     # Set exit states on t1
-    STATE t1.sn.exit = TAG_LUJVO_1
+    STATE body.t1.sn.exit = TAG_LUJVO_1
         
     # Set exit states on t1t
-    STATE t1t.sn.exit = TAG_LUJVO_1T
+    STATE body.t1t.sn.exit = TAG_LUJVO_1T
 
     # Set exit status when potentially shorter word is of valid lujvo form
     # (e.g. the smabru in tosmabru)
-    STATE tos.exit = TAG_LUJVO_TAIL_OK
+    STATE body.tos.exit = TAG_LUJVO_TAIL_OK
 
 ENDBLOCK
+#}}}
+###########################################################
+#{{{  BLOCK CULTURAL_BRIDGE
+BLOCK CULTURAL_BRIDGE
+
+# This blocks describes the extra NFA states that have to bridged on top of the
+# standard lujvo to get something that copes with cultural rafsi too.
+
+    STATE in_before_c
+        C -> in # No point using CNR, because N&R can't start initial pair
+
+    STATE in
+        LCI -> in_after_cc
+
+    STATE in_after_cc
+        V ; APOS ; V ; CNR -> exit
+        V ; VV       ; CNR -> exit
+
+ENDBLOCK
+#}}}
 
 ###########################################################
-# Recognize a sequence of cmavo.  There are two exit cases : first is a
-# sequence of 'normal' cmavo; this can potentially be prefixed onto a gismu,
-# lujvo or fu'ivla.  The 2nd may start with some 'normal' cmavo, but ends with
-# one or more cmavo of the Cy form.  This has to occur at the end of the word.
+#{{{  BLOCK CULTURAL_LUJVO
+BLOCK CULTURAL_LUJVO
+
+    # Block to recognise lujvo which have 'cultural' rafsi in them.
+
+    # Obviously this recognises all normal lujvo as well, because it will cope
+    # with >=0 of the rafsi being cultural.  That is not important, because
+    # this case is 'set differenced' away in the priority logic at the end of
+    # the file : if the word's a normal lujvo, it is never considered for
+    # recognition as a cultural one.
+
+    # The core lujvo - replicates the main lujvo matching block.
+    body : LUJVO_BODY
+
+    # The extra bits
+    s1      : CULTURAL_BRIDGE
+    tos_s1  : CULTURAL_BRIDGE
+
+    t0  : CULTURAL_BRIDGE
+    t1  : CULTURAL_BRIDGE
+    t1t : CULTURAL_BRIDGE
+    tos : CULTURAL_BRIDGE
+
+    STATE in -> body.in
+
+    ##############
+    # Add bridging between states arising from cultural rafsi being present.
 
+    STATE body.s1.in -> s1.in_before_c
+    STATE s1.exit    -> body.s1.ex_y
+
+    STATE body.t0.s2.in          -> t0.in
+    STATE body.t0.s2.in_after_cc -> t0.in_after_cc
+    STATE t0.exit                -> body.t0.s2.ex_y
+
+    STATE body.t1.s2.in          -> t1.in
+    STATE body.t1.s2.in_after_cc -> t1.in_after_cc
+    STATE t1.exit                -> body.t1.s2.ex_y
+
+    STATE body.t1t.s2.in          -> t1t.in
+    STATE body.t1t.s2.in_after_cc -> t1t.in_after_cc
+    STATE t1t.exit                -> body.t1t.s2.ex_y
+
+    STATE body.tos.tail.s2.in          -> tos.in
+    STATE body.tos.tail.s2.in_after_cc -> tos.in_after_cc
+    STATE tos.exit                     -> body.tos.tail.s2.ex_y
+    
+    STATE body.tos.s1.in -> tos_s1.in_after_cc
+    STATE tos_s1.exit    -> body.tos.s1.ex_y
+
+    ##############
+
+    # Set exit states on t0
+    STATE body.t0.sn.exit = TAG_CULTURAL_LUJVO_0
+    
+    # Set exit states on t1
+    STATE body.t1.sn.exit = TAG_CULTURAL_LUJVO_1
+        
+    # Set exit states on t1t
+    STATE body.t1t.sn.exit = TAG_CULTURAL_LUJVO_1T
+
+    # Set exit status when potentially shorter word is of valid lujvo form
+    # (e.g. the smabru in tosmabru)
+    STATE body.tos.exit = TAG_CULTURAL_LUJVO_TAIL_OK
+
+ENDBLOCK
+#}}}
+###########################################################
+#{{{  BLOCK CMAVOSEQ
 BLOCK CMAVOSEQ
 
+    # Recognize a sequence of cmavo.  There are two exit cases : first is a
+    # sequence of 'normal' cmavo; this can potentially be prefixed onto a
+    # gismu, lujvo or fu'ivla.  The 2nd may start with some 'normal' cmavo, but
+    # ends with one or more cmavo of the Cy form.  This has to occur at the end
+    # of the word.
+
     STATE in
         V -> m2, mv, in1
         CNR -> main, cy1
@@ -399,34 +489,34 @@
     STATE exit_standalone = TAG_CMAVOS_END_CY
 
 ENDBLOCK
-
-###########################################################
+#}}}
+#{{{  BLOCK GISMU
+BLOCK GISMU
 # Recognize a gismu.  The two cases CVC/CV and CCVCV get different exit
 # statuses; this allows the scanner to back up one potential prefix cmavo in
 # the CVC/CV case. (See how this is much simpler than the lujvo matcher!)
 
-BLOCK GISMU
-
     STATE in
         C     ; LCI ; V     ; CNR ; V = TAG_GISMU_0
         CNR   ; V   ; CNR   ; LCP ; V = TAG_GISMU_1
         
 ENDBLOCK
-
+#}}}
 ###########################################################
-# Recognize a slinku'i
-#
-# This is basically like recognizing a lujvo but with a much reduced state
-# topology, because the letter 'y' can't occur anywhere.  So the final rafsi
-# could be any of the valid forms, however, all earlier ones are restricted to
-# CVV, CVC or CCV.
-# For the first syllable, we jump in as though we've already recognized CV.
-# Although the potential lujvo is always going to start CV, we distinguish the
-# cases based on whether it's fu'ivla_0 or fu'ivla_1 that's going to be
-# squashed by a match, to make sure the fu'ivla NFA and slinku'i NFA are
-# treating the same length word tail as the match string.
-
+#{{{  BLOCK SLINKUI
 BLOCK SLINKUI
+    # Recognize a slinku'i
+    #
+    # This is basically like recognizing a lujvo but with a much reduced state
+    # topology, because the letter 'y' can't occur anywhere.  So the final rafsi
+    # could be any of the valid forms, however, all earlier ones are restricted to
+    # CVV, CVC or CCV.
+    # For the first syllable, we jump in as though we've already recognized CV.
+    # Although the potential lujvo is always going to start CV, we distinguish the
+    # cases based on whether it's fu'ivla_0 or fu'ivla_1 that's going to be
+    # squashed by a match, to make sure the fu'ivla NFA and slinku'i NFA are
+    # treating the same length word tail as the match string.
+
     # SYL2N is a superset of what's needed, because it allows y's.  We'll never
     # check for slinkui unless we find it's a fu'ivla so this won't cause false
     # matches.  Ideally, a custom SYL2N block is required, however using the
@@ -447,13 +537,13 @@
     STATE t0.sn.exit = TAG_SLINKUI_0
 
 ENDBLOCK
-
+#}}}
 ###########################################################
 # Fu'ivla matching blocks (including syllabic consonant rules)
 
-# This describes a valid sequence of vowels within a fu'ivla
-
+#{{{  BLOCK FV_VOWELS
 BLOCK FV_VOWELS
+# This describes a valid sequence of vowels within a fu'ivla
 
     STATE in
         V                    -> ex_single
@@ -468,83 +558,70 @@
                   -> ex_multi
 
 ENDBLOCK
-
-###########################################################
-
+#}}}
+#{{{  BLOCK FV_INITIAL_CLUSTER
 BLOCK FV_INITIAL_CLUSTER
 
     STATE in
-        C ; CI            -> exit
-        C ; CI ; CI       -> exit
-        C ; CSI           -> main
-        C ; CI ; CSI      -> main
-        C ; CI ; CI ; CSI -> main
-
-    STATE main
-                           -> exit
-        CSI                -> main
-        CI ; CSI           -> main
-        CI ; CI ; CSI      -> main
-        CI ; CI ; CI ; CSI -> main
-        CI                 -> exit
-        CI ; CI            -> exit
-        CI ; CI ; CI       -> exit
+        C ; CI                  -> exit
+        C ; CSI                 -> exit
+        C ; CI ; CI             -> exit
+        C ; CI ; CSI            -> exit
+        C ; CI ; CSI ; CSI      -> exit
+        C ; CI ; CI ; CSI ; CSI -> exit
 
 ENDBLOCK
-
-###########################################################
+#}}}
 
 Abbrev FC = C|N|R
 Abbrev FCP = CI|CP|H
 Abbrev FCS = CSI|CS|HS|NR
 
+#{{{  BLOCK FV_INTERNAL_CONS_GROUP
 BLOCK FV_INTERNAL_CONS_GROUP
 
     STATE in
         FC  -> c1
 
     STATE c1
-        FCS -> main
+        FCS -> cs
         FCP -> c2
             -> exit
 
     STATE c2
-        FCS -> main
+        FCS -> cs
         FCP -> c3
             -> exit
 
     STATE c3
-        FCS -> main
-            -> exit
+        FCS ; FCS -> cs
+        FCS ; FCP -> c1
+                  -> exit
 
-    STATE main
-        FCS -> main
+    STATE cs
+        FCS -> cs
         FCP -> c1
             -> exit
-        
+
     STATE in_req_clus
-        FC ; FCS -> main
+        FC ; FCS -> cs
         FC ; FCP -> c2
 
     # Coming in after seeing a stage 3 hyphenation triple
     # ending in a non-syllabic
     STATE in_after_c -> c1
 
-
     # Coming in after seeing a stage 3 hyphenation triple
     # ending in a syllabic
-    STATE in_after_s -> main
-        
-        
+    STATE in_after_s -> cs
 
 ENDBLOCK
-
-###########################################################
+#}}}
+#{{{  BLOCK FUIVLA_START_V
+BLOCK FUIVLA_START_V
 # fu'ivla starting with up to 3 vowels, maybe with apostrophes
 # between them, then a cluster.
 
-BLOCK FUIVLA_START_V
-
     cons    : FV_INTERNAL_CONS_GROUP
     later_v : FV_VOWELS
 
@@ -572,12 +649,14 @@
         -> exit, cons.in
 
 ENDBLOCK
+#}}}
 
 ###########################################################
-# fu'ivla starting with a single consonant and up to 2
-# vowels, maybe with apostrophes between, then a cluster.
 
+#{{{  BLOCK FUIVLA_START_CV
 BLOCK FUIVLA_START_CV
+# fu'ivla starting with a single consonant and up to 2
+# vowels, maybe with apostrophes between, then a cluster.
     cons    : FV_INTERNAL_CONS_GROUP
     later_v : FV_VOWELS
 
@@ -603,9 +682,8 @@
         -> exit, cons.in
 
 ENDBLOCK
-
-###########################################################
-
+#}}}
+#{{{  BLOCK FUIVLA_START_CC
 BLOCK FUIVLA_START_CC
     init_cc : FV_INITIAL_CLUSTER
     early_v : FV_VOWELS
@@ -624,9 +702,8 @@
     STATE later_v.ex_single -> exit, later_c.in
 
 ENDBLOCK
-
-###########################################################
-
+#}}}
+#{{{  BLOCK STAGE3_TAIL
 BLOCK STAGE3_TAIL
 
     later_c : FV_INTERNAL_CONS_GROUP
@@ -639,33 +716,36 @@
         CSI|CS|HS|NR -> after_hyph
 
     STATE after_hyph
-        H  -> later_c.in_after_c
-        HS -> later_c.in_after_s
+        H  -> goto_c
+        HS -> goto_s
+
+    STATE goto_c -> later_c.in_after_c
+    STATE goto_s -> later_c.in_after_s
 
     STATE later_c.exit -> later_v.in
     STATE later_v.ex_multi -> exit, later_c.in
     STATE later_v.ex_single -> exit, later_c.in
 
 ENDBLOCK
-
-###########################################################
-# Recognize a stage-3 fu'ivla starting CVC
-
+#}}}
+#{{{  BLOCK STAGE3_SHORT
 BLOCK STAGE3_SHORT
+# Recognize a stage-3 fu'ivla starting CVC
 
     tail : STAGE3_TAIL
 
     STATE in
         CNR ; V ; CNR -> tail.in
 
+    STATE tail.goto_c (AT_S3_3)
+    STATE tail.goto_s (AT_S3_3)
     STATE tail.exit -> exit
 
 ENDBLOCK
-
-###########################################################
-# Recognize a stage-3 fu'ivla starting CVCC or CCVC
-
+#}}}
+#{{{  BLOCK STAGE3_LONG
 BLOCK STAGE3_LONG
+# Recognize a stage-3 fu'ivla starting CVCC or CCVC
 
     tail : STAGE3_TAIL
 
@@ -673,14 +753,79 @@
         C   ; LCI ; V   ; CNR -> tail.in
         CNR ; V   ; CNR ; LCP -> tail.in
 
+    STATE tail.goto_c (AT_S3_4)
+    STATE tail.goto_s (AT_S3_4)
     STATE tail.exit -> exit
 
 ENDBLOCK
-
+#}}}
 ###########################################################
-# Recognize a fuivla
+# Recognize an "extended" stage-3, i.e. one with multiple
+# rafsi prior to the hyphen.
+
+#{{{ BLOCK X_STAGE3_CC_HEAD
+BLOCK X_STAGE3_CC_HEAD
+    STATE in
+        CNR ; LCI ; V ; CNR -> exit
+ENDBLOCK
+#}}}
+#{{{  BLOCK X_STAGE3_CV_HEAD
+BLOCK X_STAGE3_CV_HEAD
+    STATE in
+        CNR ; V ; APOS ; V -> after_cvv
+        CNR ; V ; VV       -> after_cvv
+        CNR ; V ; CNR      -> after_cvc
+
+    STATE after_cvv
+        R ; CP | CS      -> exit
+        N ; NR           -> exit
+
+    STATE after_cvc
+        CP|CS|NR         -> exit
+ENDBLOCK
+#}}}
+#{{{ BLOCK X_STAGE3_OTHER_RAFSI
+BLOCK X_STAGE3_OTHER_RAFSI
+    STATE in
+        LCI ; V  ; CNR   -> in
+        V   ; VV ; CNR   -> in
+        
+        V   ; CNR ; LCP  -> in
+        
+        V   ; CNR        -> exit3
+        LCI ; V   ; CNR  -> exit4
+        V   ; CNR ; LCP  -> exit4
+ENDBLOCK
+#}}}
+#{{{ BLOCK X_STAGE3 
+BLOCK X_STAGE3
+    cc_head : X_STAGE3_CC_HEAD
+    cv_head : X_STAGE3_CV_HEAD
+    other_rafsi : X_STAGE3_OTHER_RAFSI
+    short_tail : STAGE3_TAIL
+    long_tail  : STAGE3_TAIL
+    
+    STATE in -> cc_head.in, cv_head.in
+    STATE cc_head.exit -> other_rafsi.in
+    STATE cv_head.exit -> other_rafsi.in
+    STATE other_rafsi.exit3 -> short_tail.in
+    STATE other_rafsi.exit4 -> long_tail.in
+    STATE short_tail.exit = TAG_X_STAGE3_CVC
+    STATE long_tail.exit = TAG_X_STAGE3_LONG
+    
+# Add attributes for grabbing hyphen position
+    STATE short_tail.goto_c (AT_XS3_3)
+    STATE short_tail.goto_s (AT_XS3_3)
+    STATE long_tail.goto_c (AT_XS3_4)
+    STATE long_tail.goto_s (AT_XS3_4)
+    
+ENDBLOCK
+#}}}
 
+###########################################################
+#{{{ BLOCK FUIVLA
 BLOCK FUIVLA
+# Recognize a fuivla
 
     start_cc : FUIVLA_START_CC
     start_cv : FUIVLA_START_CV
@@ -688,12 +833,15 @@
     slinkui  : SLINKUI
     stage3_short : STAGE3_SHORT
     stage3_long  : STAGE3_LONG
+    xstage3 : X_STAGE3
 
     STATE in_no_prefix -> start_cc.in, start_cv.in, start_v.in,
-                          stage3_short.in, stage3_long.in
+                          stage3_short.in, stage3_long.in,
+                          xstage3.in
                           
     STATE in_prefixed  -> start_cc.in, start_cv.in,
-                          stage3_short.in, stage3_long.in
+                          stage3_short.in, stage3_long.in,
+                          xstage3.in
 
     STATE start_cc.goto_slinkui -> slinkui.in_after_c
 
@@ -704,8 +852,10 @@
     STATE stage3_long.exit = TAG_STAGE3_LONG
 
 ENDBLOCK
-
+#}}}
 ###########################################################
+#{{{ BLOCK CMENE
+BLOCK CMENE
 # Recognize a cmene.  Has to end with consonant, and y is treated like a vowel.
 # Take care with just a y occurring between consonants; the front end returns
 # consonant pair tokens in this case (only real vowels clear the front-end
@@ -719,8 +869,6 @@
 # Note, uppercase validation is also separate.  The front end tracks whether an
 # uppercase letter has been seen, then case-folds the letter.  At the end the
 # condition (had_uppercase & !cmene) implies a bad word.
-
-BLOCK CMENE
     STATE in
         CNR     -> c
         V       -> v
@@ -750,25 +898,27 @@
     STATE exit
         = TAG_CMENE
 ENDBLOCK
-
-###########################################################
-# Top level NFA to recognize a word.
-
+#}}}
+#{{{ BLOCK WORD
 BLOCK WORD
+# Top level NFA to recognize a word.
     gismu : GISMU
     lujvo : LUJVO
+    cultural_lujvo : CULTURAL_LUJVO
     cms : CMAVOSEQ
     cmene : CMENE
     fuivla : FUIVLA
     
-    STATE in                  -> gismu.in, lujvo.in, fuivla.in_no_prefix, cms.in, cmene.in
-    STATE cms.exit_prefixable -> gismu.in, lujvo.in, fuivla.in_prefixed
+    STATE in                  -> gismu.in, lujvo.in, cultural_lujvo.in,
+                                 fuivla.in_no_prefix, cms.in, cmene.in
+    STATE cms.exit_prefixable -> gismu.in, lujvo.in, cultural_lujvo.in,
+                                 fuivla.in_prefixed
 
     #####
     STATE in     # ENTRY STATE NAMED LAST IN FILE
 
 ENDBLOCK
-
+#}}}
 ###########################################################
 # Results definition section
 # This is the priority encoding logic to determine
@@ -783,9 +933,32 @@
 # R_LUJVO_1 above.
 Symbol S_LUJVO_1T = TAG_LUJVO_1T & TAG_LUJVO_TAIL_OK
 Result S_LUJVO_1T -> R_LUJVO_1
+
+# 'cultural' lujvo, i.e. ones containing >=1 cultural rafsi (CCVVCV).
+# These have some simplifications; we know one of these lujvo must contain 'y',
+# so these don't have to enter into checking fu'ivla validity later on.
+
+# Recall that the CULTURAL_LUJVO block matches all standard lujvo too, so
+# these have to be factored out.
+
+# Fortunately, there is direct equivalence between the ordinary and cultural
+# varieties in terms of the 0/1/1T status.
+
+Symbol S_CULTURAL_0 = TAG_CULTURAL_LUJVO_0 & ~TAG_LUJVO_0
+Symbol S_CULTURAL_1 = TAG_CULTURAL_LUJVO_1 & ~TAG_LUJVO_1
+Symbol S_CULTURAL_1T = TAG_CULTURAL_LUJVO_1T & ~TAG_LUJVO_1T
+
+Symbol S_CULTURAL_TAIL_OK = TAG_CULTURAL_LUJVO_TAIL_OK
+
+Result S_CULTURAL_0                        -> R_CULTURAL_LUJVO_0
+Result S_CULTURAL_1  & ~S_CULTURAL_0       -> R_CULTURAL_LUJVO_1
+Result S_CULTURAL_1T & ~S_CULTURAL_TAIL_OK -> R_CULTURAL_BAD_TOSMABRU
+Result S_CULTURAL_1T &  S_CULTURAL_TAIL_OK -> R_CULTURAL_LUJVO_1
+
 Symbol VALID_LUJVO = S_LUJVO_1T | R_LUJVO_1 | R_LUJVO_0
 Symbol VALID_GISMU = TAG_GISMU_0 | TAG_GISMU_1
 Symbol VLG = VALID_LUJVO | VALID_GISMU
+
 Result               ~VLG &  TAG_FUIVLA_0 &  TAG_SLINKUI_0 & ~TAG_FUIVLA_1 -> R_BAD_SLINKUI
 Symbol S_FUIVLA_0  = ~VLG &  TAG_FUIVLA_0 & ~TAG_SLINKUI_0
 Symbol S_FUIVLA_1A = ~VLG &  TAG_FUIVLA_0 &  TAG_SLINKUI_0 &  TAG_FUIVLA_1
@@ -798,10 +971,16 @@
 # particular CVC rafsi impossible, you just have to use its corresponding 4
 # letter form to work around this.)
 
-Symbol ANY_STAGE3  = TAG_STAGE3_LONG | TAG_STAGE3_CVC
+Symbol ANY_STAGE3  = TAG_STAGE3_LONG | TAG_STAGE3_CVC | TAG_X_STAGE3_CVC | TAG_X_STAGE3_LONG
+# Standard stage-3 (1 rafsi + hyphen + fu'ivla tail)
 Result S_FUIVLA_0 &  TAG_STAGE3_LONG -> R_STAGE3_0
 Result S_FUIVLA_1 &  TAG_STAGE3_LONG -> R_STAGE3_1
 Result S_FUIVLA_1 &  TAG_STAGE3_CVC & !TAG_STAGE3_LONG -> R_STAGE3_1_CVC
+# Extended stage-3 (>1 rafsi + hyphen + fu'ivla tail)
+Result S_FUIVLA_0 &  TAG_X_STAGE3_LONG -> R_X_STAGE3_0
+Result S_FUIVLA_0 &  TAG_X_STAGE3_CVC & !TAG_X_STAGE3_LONG -> R_X_STAGE3_0_CVC
+Result S_FUIVLA_1 &  TAG_X_STAGE3_LONG -> R_X_STAGE3_1
+Result S_FUIVLA_1 &  TAG_X_STAGE3_CVC & !TAG_X_STAGE3_LONG -> R_X_STAGE3_1_CVC
 
 Result S_FUIVLA_0 & ~ANY_STAGE3 -> R_STAGE4_0
 Result S_FUIVLA_1 & ~ANY_STAGE3 -> R_STAGE4_1
@@ -815,4 +994,21 @@
 
 # The default case if nothing else matches
 DefResult R_UNKNOWN
+
+# C type of exit values
+Type "enum raw_category"
+
+# C type of attribute table
+Attr Type "enum state_attribute"
+
+# Attribute tags
+Attr Result AT_S3_3
+Attr Result AT_S3_4
+Attr Result AT_XS3_3
+Attr Result AT_XS3_4
+
+# The default attribute if nothing else matches
+Attr DefResult AT_UNKNOWN
+
+# vim:cms=#%s
 
diff -urN jbofihe-0.36/n2d/Makefile jbofihe-0.37/n2d/Makefile
--- jbofihe-0.36/n2d/Makefile	Mon Mar 26 22:03:03 2001
+++ jbofihe-0.37/n2d/Makefile	Thu Jan  1 01:00:00 1970
@@ -1,47 +0,0 @@
-# $Header: /cvs/src/jbofihe/n2d/Makefile,v 1.8 2001/03/18 21:48:10 richard Exp $
-#
-# Makefile for NFA->DFA conversion utility
-#
-# Copyright (C) Richard P. Curnow  2000-2001
-# 
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-# 
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-# 
-#
-
-CC=gcc
-#CFLAGS=-g
-#CFLAGS=-O2 -pg
-CFLAGS=-O2
-OBJ = parse.o scan.o n2d.o compnfa.o expr.o tabcompr.o compdfa.o
-
-all : n2d
-
-n2d : $(OBJ)
-	$(CC) $(CFLAGS) -o n2d $(OBJ)
-
-parse.c parse.h : parse.y
-	bison -v -d parse.y
-	mv parse.tab.c parse.c
-	mv parse.tab.h parse.h
-
-parse.o : parse.c n2d.h
-
-scan.c : scan.l
-	flex -t -s scan.l > scan.c
-
-scan.o : scan.c parse.h n2d.h
-
-clean:
-	rm n2d *.o scan.c parse.c parse.h
-
diff -urN jbofihe-0.36/n2d/compdfa.c jbofihe-0.37/n2d/compdfa.c
--- jbofihe-0.36/n2d/compdfa.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/compdfa.c	Thu Jan  1 01:00:00 1970
@@ -1,321 +0,0 @@
-/***************************************
-  $Header: /cvs/src/jbofihe/n2d/compdfa.c,v 1.5 2001/03/18 22:19:53 richard Exp $
-
-  Routines for compressing the DFA by commoning-up equivalent states
-  ***************************************/
-
-/* Copyright (C) Richard P. Curnow  2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-/*
-  The input to this stage is the 'raw' DFA build from the NFA by the subset
-  construction.  Depending on the style of the NFA, there may be large chunks
-  of the DFA that have equivalent functionality, in terms of resulting in the
-  same exit value for the same sequence of input tokens, but which are reached
-  by different prefixes.  The idea of this stage is to common up such regions,
-  to reduce the size of the DFA and hence the table sizes that are generated.
-
-  Conceptually, the basis of the algorithm is to assign the DFA states to
-  equivalence classes.  If there are N different exit values, there are
-  initially N+1 classes.  All states that can exit with a particular value are
-  placed in a class together, and all non-accepting states are placed together.
-  Now, a pass is made over all pairs of states.  Two states remain equivalent
-  if for each token, their outbound transitions go to states in the same class.
-  If the states do not stay equivalent, the class they were in is split
-  accordingly.  This is repeated again and again until no more bisections
-  occur.
-
-  The algorithm actually used is to assign an ordering to the states based on
-  their current class and outbound transitions.  The states are then sorted.
-  This allows all checking to be done on near-neighbours in the sequence
-  generated by the sort, which brings the execution time down to something
-  finite. 
-
-  */
-
-#include "n2d.h"
-
-static int last_eq_class; /* Next class to assign */
-static int Nt; /* Number of tokens; has to be made static to be visible to comparison fn. */
-
-
-/**** Determine state signatures based on transitions and current classes. ****/
-
-static void
-calculate_signatures(DFANode **seq, DFANode **dfas, int ndfas)
-{
-  unsigned long sig;
-  int i, t;
-
-  for (i=0; i<ndfas; i++) {
-    DFANode *s = seq[i];
-    sig = 0UL;
-    for (t=0; t<Nt; t++) {
-      int di = s->map[t];
-      if (di >= 0) {
-        DFANode *d = dfas[di];
-        int deq_class = d->eq_class;
-      
-        sig = increment(sig, deq_class & 0xf); /* 16 bit pairs in sig */
-      }
-    }
-
-    s->signature = sig;
-  }
-}
-
-/************************* Do full compare on states *************************/
-
-static int
-general_compare(const void *a, const void *b)
-{
-  Castderef (a, const DFANode *, aa);
-  Castderef (b, const DFANode *, bb);
-
-  if (aa->eq_class < bb->eq_class) {
-    return -1;
-  } else if (aa->eq_class > bb->eq_class) {
-    return +1;
-  } else if (aa->signature < bb->signature) {
-    return -1;
-  } else if (aa->signature > bb->signature) {
-    return +1;
-  } else {
-    /* The hard way... */
-    int i;
-    for (i=0; i<Nt; i++) {
-      int am = aa->map[i];
-      int bm = bb->map[i];
-      
-      if      (am < bm) return -1;
-      else if (am > bm) return +1;
-    }
-
-  }
-
-  /* If you get here, the states are still equivalent */
-  return 0;
-
-}
-
-
-/*********************** Do one pass of class splitting ***********************/
-
-static int
-split_classes(DFANode **seq, DFANode **dfas, int ndfas)
-{
-  int i;
-  int had_to_split = 0;
-  
-  calculate_signatures(seq, dfas, ndfas);
-  qsort(seq, ndfas, sizeof(DFANode *), general_compare);
-  
-  seq[0]->new_eq_class = seq[0]->eq_class;
-  
-  for (i=1; i<ndfas; i++) {
-    seq[i]->new_eq_class = seq[i]->eq_class;
-
-    if (seq[i]->eq_class == seq[i-1]->eq_class) {
-      /* May need to split, otherwise states were previously separated anyway
-         */
-      
-      if (general_compare(seq+i, seq+i-1) != 0) {
-        /* Different transition pattern, split existing equivalent class */
-        had_to_split = 1;
-        seq[i]->new_eq_class = ++last_eq_class;
-        if (verbose) fprintf(stderr, "Found %d equivalence classes\r", last_eq_class+1);
-      } else {
-        /* This works even if seq[i-1] was assigned a new class due to
-           splitting from seq[i-2] etc. */
-        seq[i]->new_eq_class = seq[i-1]->new_eq_class;
-      }
-    }
-  }
-
-  /* Set classes to new class values. */
-  for (i=0; i<ndfas; i++) {
-    seq[i]->eq_class = seq[i]->new_eq_class;
-  }
-
-  return had_to_split;
-
-}
-
-
-/************************** Sort based on exit value **************************/
-
-static int
-initial_compare(const void *a, const void *b)
-{
-  Castderef (a, const DFANode *, aa);
-  Castderef (b, const DFANode *, bb);
-
-  if (!aa->result && bb->result) {
-    /* Put all non-accepting states first in sort order */
-    return -1;
-  } else if (aa->result && !bb->result) {
-    return +1;
-  } else if (!aa->result && !bb->result) {
-    return 0;
-  } else {
-    return strcmp(aa->result, bb->result);
-  }
-}
-  
-/******************* Determine initial equivalence classes. *******************/
-
-static void
-assign_initial_classes(DFANode **seq, int ndfas)
-{
-  int i;
-  qsort(seq, ndfas, sizeof(DFANode *), initial_compare);
-  
-  last_eq_class = 0;
-  
-  seq[0]->eq_class = last_eq_class;
-
-  for (i=1; i<ndfas; i++) {
-    if (initial_compare(seq+i-1, seq+i) != 0) {
-      /* Not same as previous entry, assign a new class */
-      seq[i]->eq_class = ++last_eq_class;
-    } else {
-      /* Same class as last entry */
-      seq[i]->eq_class = last_eq_class;
-    }
-  }
-}
-
-/***** Compress the DFA so there is precisely one state in each eq. class *****/
-
-static int
-compress_states(DFANode **dfas, int ndfas)
-{
-  int *reps;
-  int i, j, t;
-  int neqc;
-  int new_index;
-
-  neqc = 1 + last_eq_class;
-
-  /* Array containing which state is the representative of each eq. class.
-     Keep the state which had the lowest array index. */
-  reps = new_array(int, neqc);
-  
-  for (i=0; i<neqc; i++) reps[i] = -1; /* undefined */
-
-  /* Go through DFA states to find the representative of each class. */
-  for (i=0; i<ndfas; i++) {
-    int eqc = dfas[i]->eq_class;
-    if (reps[eqc] < 0) {
-      reps[eqc] = i;
-      dfas[i]->is_rep = 1;
-    } else {
-      dfas[i]->is_rep = 0;
-    }
-  }
-
-  /* Go through DFA states and assign new indices. */
-  for (i=0, new_index=0; i<ndfas; i++) {
-    if (dfas[i]->is_rep) {
-      dfas[i]->new_index = new_index++;
-      if (report) fprintf(report, "Old DFA state %d becomes %d\n", i, dfas[i]->new_index);
-    } else {
-      int eqc = dfas[i]->eq_class;
-      int rep = reps[eqc];
-
-      /* This assignment works because the representative for the class
-         must have been done earlier in the loop. */
-      dfas[i]->new_index = dfas[rep]->new_index;
-
-      if (report) fprintf(report, "Old DFA state %d becomes %d (formerly %d)\n", i, dfas[i]->new_index, rep);
-    }
-  }
-  
-  /* Go through all transitions and fix them up. */
-  for (i=0; i<ndfas; i++) {
-    DFANode *s = dfas[i];
-    for (t=0; t<Nt; t++) {
-      int dest = s->map[t];
-      if (dest >= 0) {
-        s->map[t] = dfas[dest]->new_index;
-      }
-    }
-  }
-
-  /* Go through and crunch the entries in the DFA array, fixing up the indices */
-  for (i=j=0; i<ndfas; i++) {
-    if (dfas[i]->is_rep) {
-      dfas[j] = dfas[i];
-      dfas[j]->index = dfas[j]->new_index;
-      j++;
-    }
-  }
-
-  free(reps);
-  return neqc;
-}
-
-/********** Discard the (now inaccurate) NFA bitmaps from the states **********/
-
-static void
-discard_nfa_bitmaps(DFANode **dfas, int ndfas)
-{
-  int i;
-  for (i=0; i<ndfas; i++) {
-    free(dfas[i]->nfas);
-    dfas[i]->nfas = NULL;
-  }
-  return;
-}
-
-/************************ The main callable interface. ************************/
-
-int
-compress_dfa(DFANode **dfas, int ndfas, int ntokens)
-{
-  DFANode **seq; /* Storage for node sequence */
-  int i;
-  int last_eqc;
-  int had_to_split;
-  int new_ndfas;
-
-  /* Safety net */
-  if (ndfas <= 0) return;
-
-  Nt = ntokens;
-  
-  seq = new_array(DFANode *, ndfas);
-  for (i=0; i<ndfas; i++) {
-    seq[i] = dfas[i];
-  }
-
-  assign_initial_classes(seq, ndfas);
-
-  do {
-    had_to_split = split_classes(seq, dfas, ndfas);
-  } while (had_to_split);
-
-  new_ndfas = compress_states(dfas, ndfas);
-  discard_nfa_bitmaps(dfas, new_ndfas);
-
-  free(seq);
-  return new_ndfas;
-
-}
-
-
diff -urN jbofihe-0.36/n2d/compnfa.c jbofihe-0.37/n2d/compnfa.c
--- jbofihe-0.36/n2d/compnfa.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/compnfa.c	Thu Jan  1 01:00:00 1970
@@ -1,350 +0,0 @@
-/***************************************
-  $Header: /cvs/src/jbofihe/n2d/compnfa.c,v 1.6 2001/03/18 22:19:53 richard Exp $
-
-  Routines for compressing the NFA by commoning-up equivalent states
-  ***************************************/
-
-/* Copyright (C) Richard P. Curnow  2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-/*
-  Attempt to compress the NFA by finding 'equivalent' states.  Two states are
-  'equivalent' if they have exactly the same set of transitions out of them
-  (i.e. for all possible tokens, the destination state matches between them),
-  _and_ they have identical exit values (if the end of string occurs whilst the
-  automaton is in them.)  A state is eliminated by finding all transitions that
-  go into it, and repointing them to the equivalent one.  It is fairly obvious
-  that after eliminating some states this way, other states may become
-  equivalent, so the process has to be repeated until no further eliminations
-  occur.
-  
-  Note, later on there is a compression pass done on the resulting DFA, which
-  would (I think) subsume what this block does in terms of the final DFA size.
-  However, having NFA compression reduces the runtime of the subset
-  construction, which is useful as that is a major proportion of the total
-  execution time.  Also, this block was written before the DFA compressor came
-  to be, and it's a shame to remove it.
-  
-  */
-
-#include "n2d.h"
-
-struct StateRec {
-  int idx;
-  unsigned long hash;
-};
-
-/**************************** Comparison function ****************************/
-
-static int
-compare_recs(const void *a, const void *b)
-{
-  const struct StateRec *aa = (const struct StateRec *) a;
-  const struct StateRec *bb = (const struct StateRec *) b;
-
-  if (aa->hash < bb->hash) return -1;
-  else if (aa->hash > bb->hash) return +1;
-  else if (aa->idx < bb->idx) return -1;
-  else if (aa->idx > bb->idx) return +1;
-  else return 0;
-}
-
-/********************* Calculate a hash for an NFA state *********************/
-
-static unsigned long
-calc_hash(State *s)
-{
-  unsigned long result = 0UL;
-  Stringlist *sl;
-  int i;
-  char *p;
-  
-  for (i=0; i<s->n_transitions; i++) {
-    Translist *tl = s->ordered_trans[i];
-    unsigned long contrib = (((unsigned long) tl->ds_ref << 8) | (tl->token & 0xff));
-    result = ((result << 7) | (result >> 25)) ^ contrib;
-  }
-
-  /* Don't bother about exit values, never try to common together states that
-     can be accepting (note, these are likely to be sparse in the NFA state
-     set, in contrast to the DFA state set where they may be plentiful). */
-
-  return result;
-
-}
-
-/*** Comparison function for the sort to canonicalise the transition order ***/
-
-static int
-compare_transitions(const void *a, const void *b)
-{
-  const Translist *aa = *(const Translist **) a;
-  const Translist *bb = *(const Translist **) b;
-
-  if      (aa->token < bb->token) return -1;
-  else if (aa->token > bb->token) return +1;
-  else if (aa->ds_ref < bb->ds_ref) return -1;
-  else if (aa->ds_ref > bb->ds_ref) return +1;
-  else return 0;
-}
-
-/*********** Set up canonical transitions structure for all states ***********/
-
-static void
-canonicalise_transitions(Block *b)
-{
-  int N = b->nstates;
-  int i, j;
-
-  for (i=0; i<N; i++) {
-    int nt=0;
-    Translist *tl;
-    State *s = b->states[i];
-    
-    for (tl=s->transitions; tl; tl=tl->next) nt++;
-
-    s->n_transitions = nt;
-    s->ordered_trans = new_array(Translist *, nt);
-    for (j=0, tl=s->transitions; tl; j++, tl=tl->next) {
-      s->ordered_trans[j] = tl;
-    }
-    qsort(s->ordered_trans, nt, sizeof(Translist *), compare_transitions);
-  }
-}
-
-
-/******************** Replace an NFA state by another one ********************/
-
-static void
-squash_state (Block *b, State *squashed_state, State *repl_state)
-{
-  int N = b->nstates;
-  int i,j;
-  int touched;
-  
-  for (i=0; i<N; i++) {
-    /* Redirect transitions in all states */
-    State *s = b->states[i];
-    touched = 0;
-    for (j=0; j<s->n_transitions; j++) {
-      Translist *tl = s->ordered_trans[j];
-      if (tl->ds_ref == squashed_state) {
-        touched = 1;
-        tl->ds_ref = repl_state;
-      }
-    }
-    if (touched) {
-      /* Re-sort the transitions into canonical order, as the pointers
-         affect the order for multiple transitions on the same token */
-      qsort(s->ordered_trans, s->n_transitions, sizeof(Translist *), compare_transitions);
-    }
-  }
-
-  squashed_state->removed = 1;
-
-  return;
-}
-
-/******* Rigorous check whether 2 NFA states have same exit transitions *******/
-
-static int
-check_same_transitions(State *s1, State *s2)
-{
-  int ok = 1;
-
-  int n1, n2, i;
-  n1 = s1->n_transitions;
-  n2 = s2->n_transitions;
-  if (n1 != n2) {
-    /* Not much point looking further! */
-    return 0;
-  }
-
-  for (i=0; i<n1; i++) {
-    Translist *tl1 = s1->ordered_trans[i];
-    Translist *tl2 = s2->ordered_trans[i];
-    if ((tl1->token != tl2->token) ||
-        (tl1->ds_ref != tl2->ds_ref)) {
-      ok = 0;
-      break;
-    }
-  }
-
-  return ok;
-}
-
-
-/********** Try removing NFA states, return whether any were removed **********/
-
-static int
-try_removals(Block *b, struct StateRec *recs)
-{
-  int N = b->nstates;
-  int i, n;
-  int base, here;
-  int removed_any = 0;
-  State *base_state, *here_state;
-
-  n = 0;
-  for (i=0; i<N; i++) {
-    /* Need to cope with states eliminated last time */
-    if (!b->states[i]->removed) {
-      recs[n].idx = i;
-      recs[n].hash = calc_hash(b->states[i]);
-      n++;
-    }
-  }
-
-  qsort(recs, n, sizeof(struct StateRec), compare_recs);
-
-  base = 0;
-  base_state = b->states[recs[base].idx];
-  while ((base < n) && (base_state->exitvals)) {
-    base++;
-    base_state = b->states[recs[base].idx];
-  }
-  
-  here = base + 1;
-  here_state = b->states[recs[here].idx];
-  
-  while (here < n) {
-    if (recs[base].hash == recs[here].hash) {
-      if (!(here_state->exitvals) &&
-          check_same_transitions(base_state, here_state)) {
-
-        if (report) {
-          fprintf(report, "NFA state '%s' replaced by equivalent '%s'\n",
-                  here_state->name, base_state->name);
-        }
-        
-        squash_state(b, here_state, base_state);
-        removed_any = 1;
-      }
-
-      here++;
-      here_state = b->states[recs[here].idx];
-    } else {
-      base = here;
-      base_state = here_state;
-      while ((base < n) && (base_state->exitvals)) {
-        base++;
-        base_state = b->states[recs[base].idx];
-      }
-      here = base + 1;
-      here_state = b->states[recs[here].idx];
-    }
-  }
-
-  return removed_any;
-}
-
-/************** Lexicographic comparison of states by exit value **************/
-
-static int
-compare_states_by_exitval(const void *a, const void *b)
-{
-  const State *aa = *(const State **) a;
-  const State *bb = *(const State **) b;
-  const char *aaa = aa->exitvals->string;
-  const char *bbb = bb->exitvals->string;
-
-  return strcmp(aaa, bbb);
-}
-
-/******* Try to squash accepting states that have the same result value *******/
-
-static void
-compress_accepting_states(Block *b)
-{
-  int N = b->nstates;
-  State **ac_states = new_array(State *, N);
-  int n, i;
-  int base, here;
-  const char *base_str, *here_str;
-  
-  for (i=n=0; i<N; i++) {
-    State *s = b->states[i];
-    if (s->exitvals) {
-      if (s->transitions) {
-        fprintf(stderr, "Internal error : Didn't expect to find an accepting NFA state with transitions\n");
-        exit(2);
-      }
-      if (s->exitvals->next) {
-        fprintf(stderr, "Internal error : Didn't expect to find an accepting NFA state with more than one result\n");
-        exit(2);
-      } 
-
-      ac_states[n++] = s;
-    }
-  }
-
-  /* Sort into ascending order */
-  if (n > 1) {
-    qsort(ac_states, n, sizeof(State *), compare_states_by_exitval);
-
-    base = 0;
-    base_str = ac_states[base]->exitvals->string;
-    here = 1;
-    while (here < n) {
-      here_str = ac_states[here]->exitvals->string;
-      if (!strcmp(base_str, here_str)) {
-        squash_state(b, ac_states[here], ac_states[base]);
-        if (report) {
-          fprintf(report, "Replacing accepting state '%s' by '%s'\n",
-                  ac_states[here]->name, ac_states[base]->name);
-        }
-
-      } else {
-        base = here;
-        base_str = here_str;
-      }
-
-      here++;
-    }
-  } 
-
-  free(ac_states);
-}
-
-/************************** Main callable interface **************************/
-
-/* 'b' is the top level block for the NFA, which defines all the states
-   we need to look at. */
-
-void
-compress_nfa(Block *b)
-{
-  int N = b->nstates;
-  struct StateRec *recs = new_array(struct StateRec, N);
-  int pass, any_removed;
-
-  canonicalise_transitions(b);
-  compress_accepting_states(b);
-  pass = 1;
-
-  do {
-    if (report) fprintf(report, "\nPass %d removing NFA states\n", pass);
-    any_removed = try_removals(b, recs);
-    if (!any_removed) {
-      if (report) fprintf(report, "- none removed on this pass\n\n");
-    }
-    pass++;
-  } while (any_removed);
-
-  free(recs);
-}
diff -urN jbofihe-0.36/n2d/expr.c jbofihe-0.37/n2d/expr.c
--- jbofihe-0.36/n2d/expr.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/expr.c	Thu Jan  1 01:00:00 1970
@@ -1,370 +0,0 @@
-/***************************************
-  $Header: /cvs/src/jbofihe/n2d/expr.c,v 1.4 2001/03/18 22:19:53 richard Exp $
-
-  Routines for compressing the NFA by commoning-up equivalent states
-  ***************************************/
-
-/* Copyright (C) Richard P. Curnow  2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-/*
-  Handle boolean expressions used to determine the final scanner result
-  from the set of NFA accepting states that are simultaneously active
-  at the end of the scan.
-*/
-
-#include "n2d.h"
-
-enum ExprType {
-  E_AND, E_OR, E_XOR, E_COND, E_NOT, E_WILD, E_SYMBOL
-};
-
-struct Symbol;
-
-struct Expr {
-  enum ExprType type;
-  union {
-    struct { struct Expr *c1, *c2; } and; 
-    struct { struct Expr *c1, *c2; } or; 
-    struct { struct Expr *c1, *c2; } xor; 
-    struct { struct Expr *c1, *c2, *c3; } cond; 
-    struct { struct Expr *c1; } not; 
-    struct { int pad; } wild; 
-    struct { struct Symbol *s; } symbol;
-  } data;
-};
-
-struct Symbol {
-  char *name;
-  int is_expr;
-  union {
-    Expr *e;
-    int val;
-  } data;
-};
-
-struct SymbolList {
-  struct SymbolList *next;
-  struct Symbol *sym;
-};
-
-struct Result {
-  char *result; /* The string to write to the output file */
-  Expr *e;
-};
-
-typedef struct Result Result;
-typedef struct Symbol Symbol;
-typedef struct SymbolList SymbolList;
-
-static SymbolList *symbols = NULL;
-
-static Result *results = NULL;
-static int n_results = 0;
-static int max_results = 0;
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-
-static void
-add_new_symbol(Symbol *s)
-{
-  SymbolList *nsl = new(SymbolList);
-  nsl->sym = s;
-  nsl->next = symbols;
-  symbols = nsl;
-}
-  
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-static void
-grow_results(void)
-{
-  if (n_results == max_results) {
-    max_results += 32;
-    results = resize_array(Result, results, max_results);
-  }
-}
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-Expr *
-new_wild_expr(void)
-{
-  Expr *r = new(Expr);
-  r->type = E_WILD;
-  return r; 
-}
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-Expr *
-new_not_expr(Expr *c)
-{
-  Expr *r = new(Expr);
-  r->type = E_NOT;
-  r->data.not.c1 = c;
-  return r; 
-}
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-Expr *
-new_and_expr(Expr *c1, Expr *c2)
-{
-  Expr *r = new(Expr);
-  r->type = E_AND;
-  r->data.and.c1 = c1;
-  r->data.and.c2 = c2;
-  return r; 
-}
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-Expr *
-new_or_expr(Expr *c1, Expr *c2)
-{
-  Expr *r = new(Expr);
-  r->type = E_OR;
-  r->data.or.c1 = c1;
-  r->data.or.c2 = c2;
-  return r; 
-}
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-Expr *
-new_xor_expr(Expr *c1, Expr *c2)
-{
-  Expr *r = new(Expr);
-  r->type = E_XOR;
-  r->data.xor.c1 = c1;
-  r->data.xor.c2 = c2;
-  return r; 
-}
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-Expr *
-new_cond_expr(Expr *c1, Expr *c2, Expr *c3)
-{
-  Expr *r = new(Expr);
-  r->type = E_COND;
-  r->data.cond.c1 = c1;
-  r->data.cond.c2 = c2;
-  r->data.cond.c3 = c3;
-  return r; 
-}
-
-/*++++++++++++++++++++
-  ++++++++++++++++++++*/
-static Symbol *  
-find_symbol_or_create(char *sym_name)
-{
-  int i;
-  Symbol *s;
-  SymbolList *sl;
-  for (sl=symbols; sl; sl=sl->next) {
-    s = sl->sym;
-    if (!strcmp(s->name, sym_name)) {
-      return s;
-    }
-  }
-  
-  s = new(Symbol);
-  add_new_symbol(s);
-  s->is_expr = 0; /* Until proven otherwise */
-  s->name = new_string(sym_name);
-  return s;
-}
-
-/*****************************************************************/
-/* Return expr for symbol name if it already exist, else create. */
-/*****************************************************************/
-
-Expr *
-new_sym_expr(char *sym_name)
-{
-  Expr *r;
-  Symbol *s;
-
-  s = find_symbol_or_create(sym_name);
-  
-  r = new(Expr);
-  r->type = E_SYMBOL;
-  r->data.symbol.s = s;
-  return r; 
-}
-
-/*++++++++++++++++++++
-  Add a result defn.  If the expr is null, it means build a single expr corr.
-  to the value of the symbol with the same name as the result string.
-  ++++++++++++++++++++*/
-
-void
-define_result(char *string, Expr *e)
-{
-  int i;
-  Result *r;
-
-  grow_results();
-  r = &results[n_results++];
-  r->result = new_string(string);
-  if (e) {
-    r->e = e;
-  } else {
-    Expr *ne;
-    ne = new_sym_expr(string);
-    r->e = ne;
-  }
-
-  return;
-}
-
-/*++++++++++++++++++++
-  Define an entry in the symbol table.
-  ++++++++++++++++++++*/
-  
-void
-define_symbol(char *name, Expr *e)
-{
-  Symbol *s;
-  s = find_symbol_or_create(name);
-  s->data.e = e;
-  s->is_expr = 1;
-  return;
-}
-/*++++++++++++++++++++
-  Define an entry in the symbol table, and a result with the same name.
-  ++++++++++++++++++++*/
-  
-void
-define_symresult(char *name, Expr *e)
-{
-  define_symbol(name, e);
-  define_result(name, e);
-  return;
-}
-
-/*++++++++++++++++++++
-  Flag indicating whether any results evaluated so far have evaluated true.
-  (Used for implementing wildcard expression).
-  ++++++++++++++++++++*/
-
-static int any_results_so_far;
-
-/*++++++++++++++++++++
-  Clear all symbol values.
-  ++++++++++++++++++++*/
-
-void
-clear_symbol_values(void)
-{
-  SymbolList *sl;
-  for (sl=symbols; sl; sl=sl->next) {
-    Symbol *s = sl->sym;
-    if (0 == s->is_expr) {
-      s->data.val = 0;
-    }
-  }
-  any_results_so_far = 0;
-}
-
-/*++++++++++++++++++++
-  Set the value of a symbol
-  ++++++++++++++++++++*/
-
-void
-set_symbol_value(char *sym_name)
-{
-  Symbol *s;
-
-  s = find_symbol_or_create(sym_name);
-  if (s->is_expr) {
-    fprintf(stderr, "Cannot set value for symbol '%s', it is defined by an expression\n");
-    exit(2);
-  } else {
-    s->data.val = 1;
-  }
-}
-
-/*++++++++++++++++++++
-  Evaluate the value of an expr
-  ++++++++++++++++++++*/
-  
-static int
-eval(Expr *e)
-{
-  switch (e->type) {
-    case E_AND:
-      return eval(e->data.and.c1) && eval(e->data.and.c2);
-    case E_OR:
-      return eval(e->data.or.c1) || eval(e->data.or.c2);
-    case E_XOR:
-      return eval(e->data.xor.c1) ^ eval(e->data.xor.c2);
-    case E_COND:
-      return eval(e->data.cond.c1) ? eval(e->data.cond.c2) : eval(e->data.cond.c3);
-    case E_NOT:
-      return !eval(e->data.not.c1);
-    case E_WILD:
-      return any_results_so_far;
-    case E_SYMBOL:
-      {
-        Symbol *s = e->data.symbol.s;
-        if (s->is_expr) {
-          return eval(s->data.e);
-        } else {
-          return s->data.val;
-        }
-      }
-    default:
-      fprintf(stderr, "Interal error : Can't get here!\n");
-      exit(2);
-  }
-}
-
-/*++++++++++++++++++++
-  Evaluate the result which holds given the symbols that are set
-  ++++++++++++++++++++*/
-
-int
-evaluate_result(char **result)
-{
-  int i;
-  int matched = -1;
-  for (i=0; i<n_results; i++) {
-    if (eval(results[i].e)) {
-      if (any_results_so_far) {
-        *result = NULL;
-        return 0;
-      } else {
-        any_results_so_far = 1;
-        matched = i;
-      }
-    }
-  }
-
-  if (matched < 0) {
-    *result = NULL;
-    return 1;
-  } else {
-    *result = results[matched].result;
-    return 1;
-  }
-}
-
diff -urN jbofihe-0.36/n2d/n2d.c jbofihe-0.37/n2d/n2d.c
--- jbofihe-0.36/n2d/n2d.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/n2d.c	Thu Jan  1 01:00:00 1970
@@ -1,1395 +0,0 @@
-/***************************************
-  $Header: /cvs/src/jbofihe/n2d/n2d.c,v 1.16 2001/03/18 22:31:28 richard Exp $
-
-  Main program for NFA to DFA table builder program.
-  ***************************************/
-
-/* Copyright (C) Richard P. Curnow  2000-2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-/*
-  Convert a nondeterminstic finite automaton (NFA) into a deterministic finite
-  automaton (DFA).
-
-  The NFA is defined in terms of a set of states, with transitions between the
-  states.  The transitions may occur on any one of a set of symbols (specified
-  with | characters between the options), or may be 'epsilon' transitions, i.e.
-  occurring without consumption of any input.  A state may have multiple
-  transitions for the same input symbol (hence 'nondeterministic').  The final
-  state encountered within the final block defined in the input file is taken
-  to be the start state of the whole NFA.  A state may be entered more than
-  once in the file; the transitions in the multiple definitions are combined to
-  give the complete transition set.  A state may have an exit value assigned
-  (with =); this is the return value of the automaton if the end of string is
-  encountered when in that state.  (If the resulting DFA can be in multiple
-  exiting NFA states when the end of string is reached, the result is all the
-  associated NFA exit values or'd together, so it is best to use distinct bits
-  for NFA exit values unless it is known that is safe not to in a particular
-  case.) The input grammar allows a BLOCK <name> ... ENDBLOCK construction +
-  block instantiation.  This allows common parts of the NFA state machine to be
-  reused in multiple places as well as aiding structuring and readability.  See
-  morf_nfa.in for an example of the input grammar, and morf.c for a
-  (non-trivial) example of how to build the automaton around the tables that
-  this script generates.
-*/
-
-#include <ctype.h>
-#include "n2d.h"
-
-/* Globally visible options to control reporting */
-FILE *report;
-int verbose;
-
-static Block **blocks = NULL;
-static int nblocks = 0;
-static int maxblocks = 0;
-
-static char **toktable=NULL;
-static int ntokens = 0;
-static int maxtokens = 0;
-
-struct Abbrev {
-  char *lhs; /* Defined name */
-  char **rhs; /* Token/define */
-  int nrhs;
-  int maxrhs;
-};
-
-static struct Abbrev *abbrevtable=NULL;
-static int nabbrevs = 0;
-static int maxabbrevs = 0;
-
-static char *defresult = "0";
-
-/* ================================================================= */
-
-void
-define_defresult(char *string)
-{
-  defresult = string;
-}
-
-/* ================================================================= */
-
-static void
-grow_tokens(void)
-{
-  maxtokens += 32;
-  toktable = resize_array(char *, toktable, maxtokens);
-}
-
-/* ================================================================= */
-
-static int
-create_token(char *name)
-{
-  int result;
-  if (ntokens == maxtokens) {
-    grow_tokens();
-  }
-  result = ntokens++;
-  toktable[result] = new_string(name);
-  return result;
-}
-
-/* ================================================================= */
-
-int
-lookup_token(char *name, int create)
-{
-  int found = -1;
-  int i;
-  for (i=0; i<ntokens; i++) {
-    if (!strcmp(toktable[i], name)) {
-      found = i;
-      break;
-    }
-  }
-
-  switch (create) {
-    case USE_OLD_MUST_EXIST:
-      if (found < 0) {
-        fprintf(stderr, "Token '%s' was never declared\n", name);
-        exit(1);
-      }        
-      break;
-    case CREATE_MUST_NOT_EXIST:
-      if (found >= 0) {
-        fprintf(stderr, "Token '%s' already declared\n", name);
-        exit(1);
-      } else {
-        found = create_token(name);
-      }
-      break;
-    case CREATE_OR_USE_OLD:
-      if (found < 0) {
-        found = create_token(name);
-      }
-      break;
-  }
-  
-  return found;
-}
-
-/* ================================================================= */
-
-static void
-grow_abbrevs(void)
-{
-  maxabbrevs += 32;
-  abbrevtable = resize_array(struct Abbrev, abbrevtable, maxabbrevs);
-}
-
-/* ================================================================= */
-
-struct Abbrev *
-create_abbrev(char *name)
-{
-  struct Abbrev *result;
-  if (nabbrevs == maxabbrevs) {
-    grow_abbrevs();
-  }
-  result = abbrevtable + (nabbrevs++);
-  result->lhs = new_string(name);
-  result->nrhs = result->maxrhs = 0;
-  result->rhs = 0;
-  return result;
-}
-
-/* ================================================================= */
-
-void
-add_tok_to_abbrev(struct Abbrev *abbrev, char *tok)
-{
-  if (abbrev->nrhs == abbrev->maxrhs) {
-    abbrev->maxrhs += 8;
-    abbrev->rhs = resize_array(char *, abbrev->rhs, abbrev->maxrhs);
-  }
-
-  abbrev->rhs[abbrev->nrhs++] = new_string(tok);
-}
-
-/* ================================================================= */
-
-static struct Abbrev *
-lookup_abbrev(char *name, int create)
-{
-  int found = -1;
-  int i;
-  struct Abbrev *result = NULL;
-  /* Scan table in reverse order.  If a name has been redefined,
-     make sure the most recent definition is picked up. */
-  for (i=nabbrevs-1; i>=0; i--) {
-    if (!strcmp(abbrevtable[i].lhs, name)) {
-      found = i;
-      result = abbrevtable + found;
-      break;
-    }
-  }
-
-  switch (create) {
-    case CREATE_MUST_NOT_EXIST:
-      if (found >= 0) {
-        fprintf(stderr, "Abbreviation '%s' already declared\n", name);
-        exit(1);
-      } else {
-        result = create_abbrev(name);
-      }
-      break;
-    case CREATE_OR_USE_OLD:
-      if (found < 0) {
-        result = create_abbrev(name);
-      }
-      break;
-  }
-  
-  return result;
-}
-
-/* ================================================================= */
-
-static void
-grow_blocks(void)
-{
-  maxblocks += 32;
-  blocks = resize_array(Block*, blocks, maxblocks);
-}
-
-/* ================================================================= */
-
-static Block *
-create_block(char *name)
-{
-  Block *result;
-  int i;
-  
-  if (nblocks == maxblocks) {
-    grow_blocks();
-  }
-  
-#if 0  
-  /* Not especially useful to show this */
-  if (verbose) {
-    fprintf(stderr, " %s", name);
-  }
-#endif
-  
-  result = blocks[nblocks++] = new(Block);
-  result->name = new_string(name);
-  for (i=0; i<HASH_BUCKETS; i++) { 
-    result->state_hash[i].states = NULL;
-    result->state_hash[i].nstates = 0;
-    result->state_hash[i].maxstates = 0;
-  }
-  result->states = NULL;
-  result->nstates = result->maxstates = 0;
-
-  result->subcount = 1;
-  return result;
-}
-
-/* ================================================================= */
-
-
-Block *
-lookup_block(char *name, int create)
-{
-  Block *found = NULL;
-  int i;
-  for (i=0; i<nblocks; i++) {
-    if (!strcmp(blocks[i]->name, name)) {
-      found = blocks[i];
-      break;
-    }
-  }
-
-  switch (create) {
-    case USE_OLD_MUST_EXIST:
-      if (!found) {
-        fprintf(stderr, "Could not find block '%s' to instantiate\n", name);
-        exit(1);
-      }        
-      break;
-    case CREATE_MUST_NOT_EXIST:
-      if (found) {
-        fprintf(stderr, "Already have a block called '%s', cannot redefine\n", name);
-        exit(1);
-      } else {
-        found = create_block(name);
-      }
-      break;
-    case CREATE_OR_USE_OLD:
-      if (!found) {
-        found = create_block(name);
-      }
-      break;
-  }
-  
-  return found;
-}
-
-/* ================================================================= */
-  
-static void
-maybe_grow_states(Block *b, int hash)
-{
-  Stateset *ss = b->state_hash + hash;
-  if (ss->nstates == ss->maxstates) {
-    ss->maxstates += 8;
-    ss->states = resize_array(State*, ss->states, ss->maxstates);
-  }
-  if (b->nstates == b->maxstates) {
-    b->maxstates += 32;
-    b->states = resize_array(State*, b->states, b->maxstates);
-  }
-  
-}
-
-/* ================================================================= */
-
-static unsigned long
-hashfn(const char *s)
-{
-  unsigned long y = 0UL, v, w, x, k;
-  unsigned long yl, yh;
-  const char *t = s;
-  while (1) {
-    k = (unsigned long) *(unsigned char *)(t++);
-    if (!k) break;
-    v = ~y;
-    w = y<<13;
-    x = v>>6;
-    y = w ^ x;
-    y += k;
-  }
-  y ^= (y>>13);
-  y &= HASH_MASK;
-  return y;
-}
-
-/* ================================================================= */
-
-static State *
-create_state(Block *b, char *name)
-{
-  State *result;
-  int hash;
-  Stateset *ss;
-  hash = hashfn(name);
-  maybe_grow_states(b, hash);
-  ss = b->state_hash + hash;
-  result = b->states[b->nstates++] = ss->states[ss->nstates++] = new(State);
-  result->name = new_string(name);
-  result->parent = b;
-  result->index = b->nstates - 1;
-  result->transitions = NULL;
-  result->exitvals = NULL;
-  result->ordered_trans = NULL;
-  result->n_transitions = 0;
-  result->removed = 0;
-  return result;
-}
-
-/* ================================================================= */
-
-State *
-lookup_state(Block *b, char *name, int create)
-{
-  State *found = NULL;
-  int i;
-  int hash;
-  Stateset *ss;
-
-  hash = hashfn(name);
-  ss = b->state_hash + hash;
-  
-  for (i=0; i<ss->nstates; i++) {
-    if (!strcmp(ss->states[i]->name, name)) {
-      found = ss->states[i];
-      break;
-    }
-  }
-
-  switch (create) {
-    case USE_OLD_MUST_EXIST:
-      if (!found) {
-        fprintf(stderr, "Could not find a state '%s' in block '%s' to transition to\n", name, b->name);
-        exit(1);
-      }        
-      break;
-    case CREATE_MUST_NOT_EXIST:
-      if (found) {
-        fprintf(stderr, "Warning : already have a state '%s' in block '%s'\n", name, b->name);
-      } else {
-        found = create_state(b, name);
-      }
-      break;
-    case CREATE_OR_USE_OLD:
-      if (!found) {
-        found = create_state(b, name);
-      }
-      break;
-  }
-  
-  return found;
-}
-
-/* ================================================================= */
-  
-Stringlist *
-add_token(Stringlist *existing, char *token)
-{
-  Stringlist *result = new(Stringlist);
-  if (token) {
-    result->string = new_string(token);
-  } else {
-    result->string = NULL;
-  }
-  result->next = existing;
-  return result;
-}
-
-/* ================================================================= */
-/* Add a single transition to the state.  Allow definitions to be
-   recursive */
-
-static void
-add_transition(State *curstate, char *str, char *destination)
-{
-  struct Abbrev *abbrev;
-  abbrev = (str) ? lookup_abbrev(str, USE_OLD_MUST_EXIST) : NULL;
-  if (abbrev) {
-    int i;
-    for (i=0; i<abbrev->nrhs; i++) {
-      add_transition(curstate, abbrev->rhs[i], destination);
-    }
-  } else {
-    Translist *tl;
-    tl = new(Translist);
-    tl->next = curstate->transitions;
-    /* No problem with aliasing, these strings are read-only and have
-       lifetime = until end of program */
-    tl->token = (str) ? lookup_token(str, USE_OLD_MUST_EXIST) : -1;
-    tl->ds_name = destination;
-    curstate->transitions = tl;
-  }
-}
-
-/* ================================================================= */
-
-void
-add_transitions(State *curstate, Stringlist *tokens, char *destination)
-{
-  Stringlist *sl;
-  struct Abbrev *abbrev;
-  for (sl=tokens; sl; sl=sl->next) {
-    add_transition(curstate, sl->string, destination);
-  }
-}
-
-/* ================================================================= */
-
-State *
-add_transitions_to_internal(Block *curblock, State *addtostate, Stringlist *tokens)
-{
-  char buffer[1024];
-  State *result;
-  sprintf(buffer, "#%d", curblock->subcount++);
-  result = lookup_state(curblock, buffer, CREATE_MUST_NOT_EXIST);
-  add_transitions(addtostate, tokens, result->name);
-  return result;
-}
-
-
-/* ================================================================= */
-
-void
-add_exit_value(State *curstate, char *value)
-{
-  Stringlist *sl;
-  sl = new(Stringlist);
-  sl->string = value;
-  sl->next = curstate->exitvals;
-  curstate->exitvals = sl;
-}
-
-/* ================================================================= */
-
-void
-instantiate_block(Block *curblock, char *block_name, char *instance_name)
-{
-  Block *master = lookup_block(block_name, USE_OLD_MUST_EXIST);
-  char namebuf[1024];
-  int i;
-  for (i=0; i<master->nstates; i++) {
-    State *s = master->states[i];
-    State *new_state;
-    Translist *tl;
-    Stringlist *sl, *ex;
-    
-    strcpy(namebuf, instance_name);
-    strcat(namebuf, ".");
-    strcat(namebuf, s->name);
-    
-    /* In perverse circumstances, we might already have a state called this */
-    new_state = lookup_state(curblock, namebuf, CREATE_OR_USE_OLD);
-    
-    for (tl=s->transitions; tl; tl=tl->next) {
-      Translist *new_tl = new(Translist);
-      new_tl->token = tl->token;
-      strcpy(namebuf, instance_name);
-      strcat(namebuf, ".");
-      strcat(namebuf, tl->ds_name);
-      new_tl->ds_name = new_string(namebuf);
-      new_tl->ds_ref = NULL;
-      new_tl->next = new_state->transitions;
-      new_state->transitions = new_tl;
-    }
-    
-    ex = NULL;
-    for (sl=s->exitvals; sl; sl=sl->next) {
-      Stringlist *new_sl = new(Stringlist);
-      new_sl->string = sl->string;
-      new_sl->next = ex;
-      ex = new_sl;
-    }
-    new_state->exitvals = ex;
-        
-    
-  }
-}
-
-/* ================================================================= */
-
-void
-fixup_state_refs(Block *b)
-{
-  int i;
-  for (i=0; i<b->nstates; i++) {
-    State *s = b->states[i];
-    Translist *tl;
-    for (tl=s->transitions; tl; tl=tl->next) {
-      tl->ds_ref = lookup_state(b, tl->ds_name, CREATE_OR_USE_OLD);
-    }
-  }
-}
-
-/* ================================================================= */
-
-/* Bitmap to contain epsilon closure for NFA */
-
-static unsigned long **eclo;
-
-
-/* ================================================================= */
-
-static inline const int
-round_up(const int x) {
-  return (x+31)>>5;
-}
-
-/* ================================================================= */
-
-static inline void
-set_bit(unsigned long *x, int n)
-{
-  int r = n>>5;
-  unsigned long m = 1UL<<(n&31);
-  x[r] |= m;
-}
-
-/* ================================================================= */
-
-static inline int
-is_set(unsigned long *x, int n)
-{
-  int r = n>>5;
-  unsigned long m = 1UL<<(n&31);
-  return !!(x[r] & m);
-}
-
-/* ================================================================= */
-/* During the algorithm to transitively close the epsilon closure table,
-   maintain a stack of indices that have to be rescanned.  This avoids the slow
-   approach of repeatedly rescanning the whole table until no changes are
-   found. */
-
-typedef struct IntPair {
-  struct IntPair *next;
-  int i;
-  int j;
-} IntPair;
-
-static IntPair *freelist=NULL;
-static IntPair *stack=NULL;
-
-/* ================================================================= */
-
-static void
-push_pair(int i, int j)
-{
-  static const int grow_by = 32;
-  IntPair *np;
-  
-  if (!freelist) {
-    IntPair *ip = new_array(IntPair, grow_by);
-    int x;
-    for (x=1; x<grow_by; x++) {
-      ip[x].next = &ip[x-1];
-    }
-    ip[0].next = NULL;
-    freelist = &ip[grow_by-1];
-  }
-  np = freelist;
-  freelist = freelist->next;
-  np->next = stack;
-  stack = np;
-  np->i = i;
-  np->j = j;
-}
-
-
-/* ================================================================= */
-
-static int
-pop_pair(int *i, int *j) {
-  IntPair *ip;
-  if (!stack) {
-    return 0;
-  } else {
-    ip = stack;
-    *i = ip->i;
-    *j = ip->j;
-    stack = ip->next;
-    ip->next = freelist;
-    freelist = ip;
-    return 1;
-  }
-}
-
-/* ================================================================= */
-
-static void
-generate_epsilon_closure(Block *b)
-{
-  int i, j, N;
-  
-  N = b->nstates;
-  eclo = new_array(unsigned long*, N);
-  for (i=0; i<N; i++) {
-    eclo[i] = new_array(unsigned long, round_up(N));
-    for (j=0; j<round_up(N); j++) {
-      eclo[i][j] = 0;
-    }
-  }
-
-  /* Determine initial immediate transitions */
-  for (i=0; i<N; i++) {
-    State *s = b->states[i];
-    Translist *tl;
-    int from_state = s->index;
-    set_bit(eclo[from_state], from_state); /* Always reflexive */
-    
-    for (tl=s->transitions; tl; tl=tl->next) {
-      if (tl->token < 0) { /* epsilon trans */
-        int to_state = tl->ds_ref->index;
-        set_bit(eclo[from_state], to_state);
-        push_pair(from_state, to_state);
-      }
-    }
-  }
-
-  /* Now keep on processing until the table is transitively closed */
-  while (pop_pair(&i, &j)) {
-    int k;
-    for (k=0; k<N; k++) {
-      if (is_set(eclo[j], k) && !is_set(eclo[i], k)) {
-        set_bit(eclo[i], k);
-        push_pair(i,k);
-      }
-    }
-  }
-}
-
-/* ================================================================= */
-
-static void
-print_nfa(Block *b)
-{
-  int i, j, N;
-  N = b->nstates;
-  
-  if (!report) return;
-
-  for (i=0; i<N; i++) {
-    State *s = b->states[i];
-    Translist *tl;
-    Stringlist *sl;
-    fprintf(report, "NFA state %d = %s\n", i, s->name);
-    for (tl=s->transitions; tl; tl=tl->next) {
-      fprintf(report, "  [%s] -> %s\n",
-              (tl->token >= 0) ? toktable[tl->token] : "(epsilon)",
-              tl->ds_name);
-    }
-    if (s->exitvals) {
-      int first = 1;
-      if (report) fprintf(report, "  Exit value : ");
-      for (sl=s->exitvals; sl; sl=sl->next) {
-        fprintf(report, "%s%s",
-                first ? "" : "|",
-                s->exitvals->string);
-      }
-      fprintf(report, "\n");
-    }
-    fprintf(report, "  Epsilon closure :\n    (self)\n");
-    for (j=0; j<N; j++) {
-      if (i!=j && is_set(eclo[i], j)) {
-        fprintf(report, "    %s\n", b->states[j]->name);
-      }
-    }
-    
-    fprintf(report, "\n");
-  }
-
-}
-
-/* ================================================================= */
-
-/* Indexed [from_state][token][to_state], flag set if there is
-   a transition from from_state to to_state, via token then zero or more
-   epsilon transitions */
-
-static unsigned long ***transmap;
-
-/* Index [from_nfa_state][token], flag set if there is a transition
-   to any destination nfa state for that token. */
-static unsigned long **anytrans;
-
-/* ================================================================= */
-
-static void
-build_transmap(Block *b)
-{
-  int N = b->nstates;
-  int Nt = ntokens;
-  int i, j, k, m;
-  
-  transmap = new_array(unsigned long **, N);
-  anytrans = new_array(unsigned long *, N);
-  for (i=0; i<N; i++) {
-    transmap[i] = new_array(unsigned long *, Nt);
-    anytrans[i] = new_array(unsigned long, round_up(Nt));
-    for (j=0; j<round_up(Nt); j++) {
-      anytrans[i][j] = 0UL;
-    }
-    for (j=0; j<Nt; j++) {
-      transmap[i][j] = new_array(unsigned long, round_up(N));
-      for (k=0; k<round_up(N); k++) {
-        transmap[i][j][k] = 0UL;
-      }
-    }
-  }
-
-  for (i=0; i<N; i++) {
-    State *s = b->states[i];
-    Translist *tl;
-    for (tl=s->transitions; tl; tl=tl->next) {
-      if (tl->token >= 0) {
-        int dest = tl->ds_ref->index;
-        for (m=0; m<round_up(N); m++) {
-          unsigned long x = eclo[dest][m];
-          transmap[i][tl->token][m] |= x;
-          if (!!x) set_bit(anytrans[i], tl->token);
-        }
-      }
-    }
-  }
-
-  
-}
-
-/* ================================================================= */
-
-static DFANode **dfas;
-static int ndfa=0;
-static int maxdfa=0;
-
-static int had_ambiguous_result = 0;
-
-/* ================================================================= */
-
-/* Implement an array of linked lists to access DFA states directly.  The
- * hashes are given by folding the signatures down to single bytes. */
-
-struct DFAList {
-  struct DFAList *next;
-  DFANode *dfa;
-};
-
-#define DFA_HASHSIZE 256
-static struct DFAList *dfa_hashtable[DFA_HASHSIZE];
-
-/* ================================================================= */
-
-static void
-grow_dfa(void)
-{ 
-  maxdfa += 32;
-  dfas = resize_array(DFANode*, dfas, maxdfa);
-}
-
-/* ================================================================= */
-
-static unsigned long
-fold_signature(unsigned long sig)
-{
-  unsigned long folded;
-  folded = sig ^ (sig >> 16);
-  folded ^= (folded >> 8);
-  folded &= 0xff;
-  return folded;
-}
-
-
-/* ================================================================= */
-/* Simple linear search.  Use 'signatures' to get rapid rejection
-   of any DFA state that can't possibly match */
-
-static int
-find_dfa(unsigned long *nfas, int N)
-{
-  int res=-1;
-  int i, j;
-  unsigned long signature = 0UL;
-  unsigned long folded_signature;
-  struct DFAList *dfal;
-
-  for (j=0; j<round_up(N); j++) {
-    signature ^= nfas[j];
-  }
-  folded_signature = fold_signature(signature);
-  
-  for(dfal=dfa_hashtable[folded_signature]; dfal; dfal = dfal->next) {
-    DFANode *dfa = dfal->dfa;
-    int matched;
-
-    if (signature != dfa->signature) continue;
-    
-    matched=1;
-
-    for (j=0; j<round_up(N); j++) {
-      if (nfas[j] != dfa->nfas[j]) {
-        matched = 0;
-        break;
-      }
-    }
-    if (matched) {
-      return dfa->index;
-    }
-  }
-  return -1;
-}
-
-/* ================================================================= */
-
-static int
-add_dfa(Block *b, unsigned long *nfas, int N, int Nt)
-{
-  int j;
-  int result = ndfa;
-  int had_exitvals;
-  int this_result_unambiguous;
-  
-  Stringlist *ex;
-  unsigned long signature = 0UL, folded_signature;
-  struct DFAList *dfal;
-
-  if (verbose) {
-    fprintf(stderr, "Adding DFA state %d\r", ndfa);
-    fflush(stderr);
-  }
-
-  if (maxdfa == ndfa) {
-    grow_dfa();
-  }
-
-  dfas[ndfa] = new(DFANode);
-  dfas[ndfa]->nfas = new_array(unsigned long, round_up(N));
-  dfas[ndfa]->map = new_array(int, Nt);
-  dfas[ndfa]->index = ndfa;
-  dfas[ndfa]->defstate = -1;
-
-  for (j=0; j<round_up(N); j++) {
-    unsigned long x = nfas[j];
-    signature ^= x;
-    dfas[ndfa]->nfas[j] = x;
-  }
-  dfas[ndfa]->signature = signature;
-  
-  folded_signature = fold_signature(signature);
-  dfal = new(struct DFAList);
-  dfal->dfa = dfas[ndfa];
-  dfal->next = dfa_hashtable[folded_signature];
-  dfa_hashtable[folded_signature] = dfal;
-
-  ex = NULL;
-  had_exitvals = 0;
-  clear_symbol_values();
-  for (j=0; j<N; j++) {
-    if (is_set(dfas[ndfa]->nfas, j)) {
-      Stringlist *sl;
-      State *s = b->states[j];
-      for (sl = s->exitvals; sl; sl = sl->next) {
-        Stringlist *new_sl;
-        new_sl = new(Stringlist);
-        new_sl->string = sl->string;
-        new_sl->next = ex;
-        ex = new_sl;
-
-        set_symbol_value(sl->string);
-        had_exitvals = 1;
-      }
-    }
-  }
-  
-  this_result_unambiguous = evaluate_result(&dfas[ndfa]->result);
-  dfas[ndfa]->nfa_sl = ex;
-
-  if (!this_result_unambiguous) {
-    Stringlist *sl;
-    fprintf(stderr, "WARNING : Ambiguous exit state abandoned for DFA state %d\n", ndfa);
-    fprintf(stderr, "NFA exit tags applying in this stage :\n");
-    for (sl = ex; sl; sl = sl->next) {
-      fprintf(stderr, "  %s\n", sl->string);
-    }
-    had_ambiguous_result = 1;
-  }
-        
-  ndfa++;
-  return result;
-}
-
-/* ================================================================= */
-
-static void
-clear_nfas(unsigned long *nfas, int N)
-{
-  int i;
-  for (i=0; i<round_up(N); i++) {
-    nfas[i] = 0;
-  }
-}
-
-/* ================================================================= */
-
-static void
-build_dfa(Block *b, int start_index)
-{
-  unsigned long **nfas;
-  int i;
-  int N, Nt;
-  int next_to_do;
-  int *found_any;
-  int rup_N;
-
-  for (i=0; i<DFA_HASHSIZE; i++) dfa_hashtable[i] = NULL;
-  
-  N = b->nstates;
-  rup_N = round_up(N);
-  Nt = ntokens;
-  
-  /* Add initial state */
-  nfas = new_array(unsigned long *, Nt);
-  for (i=0; i<Nt; i++) {
-    nfas[i] = new_array(unsigned long, round_up(N));
-  }
-  clear_nfas(nfas[0], N);
-  for (i=0; i<round_up(N); i++) {
-    nfas[0][i] |= eclo[start_index][i];
-  }
-  add_dfa(b, nfas[0], N, Nt);
-  next_to_do = 0;
-  found_any = new_array(int, Nt);
-
-  /* Now the heart of the program : the subset construction to turn the NFA
-     into a DFA.  This is a major performance hog in the program, so there are
-     lots of tricks to speed this up (particularly, hoisting intermediate
-     pointer computations out of the loop to assert the fact that there is no
-     aliasing between the arrays.) */
-
-  while (next_to_do < ndfa) {
-
-    int t; /* token index */
-    int j0, j0_5, j1, j, mask, k, m;
-    int idx;
-    unsigned long *current_nfas;
-    unsigned long block_bitmap;
-    
-    for (j=0; j<Nt; j++) {
-      clear_nfas(nfas[j], N);
-      found_any[j] = 0;
-    }
-
-    current_nfas = dfas[next_to_do]->nfas;
-    for (j0=0; j0<rup_N; j0++) { /* Loop over NFA states which may be in this DFA state */
-      block_bitmap = current_nfas[j0];
-      if (!block_bitmap) continue;
-      j0_5 = j0 << 5;
-      for (mask=1UL, j1=0; j1<32; mask<<=1, j1++) {
-        j = j0_5 + j1;
-        if (block_bitmap & mask) { /* Is NFA state in DFA */
-          unsigned long **transmap_j = transmap[j];
-          unsigned long *anytrans_j = anytrans[j];
-          for (t=0; t<Nt; t++) { /* Loop over transition symbols */
-            unsigned long *transmap_t;
-            unsigned long *nfas_t;
-            unsigned long found_any_t;
-            if (!is_set(anytrans_j, t)) continue;
-            transmap_t = transmap_j[t];
-            nfas_t = nfas[t];
-            found_any_t = found_any[t];
-            for (k=0; k<rup_N; k++) { /* Loop over destination NFA states */
-              unsigned long x;
-              x = transmap_t[k];
-              nfas_t[k] |= x;
-              found_any_t |= !!x;
-            }
-            found_any[t] = found_any_t;
-          }
-        }
-      }
-    }
-          
-    for (t=0; t<Nt; t++) {
-      if (found_any[t]) {
-        idx = find_dfa(nfas[t], N);
-        if (idx < 0) {
-          idx = add_dfa(b, nfas[t], N, Nt);
-        }
-      } else {
-        idx = -1;
-      }
-      dfas[next_to_do]->map[t] = idx;
-    }
-
-    next_to_do++;
-  }
-
-  free(found_any);
-  for (i=0; i<Nt; i++) free(nfas[i]);
-  free(nfas);
-}
-
-/* ================================================================= */
-
-static void
-print_dfa(Block *b)
-{
-  int N = b->nstates;
-  int Nt = ntokens;
-  
-  int i, j, j0, j0_5, j1, t;
-  unsigned long mask;
-  unsigned long current_nfas;
-  int rup_N = round_up(N);
-  Stringlist *ex;
-
-  if (!report) return;
-  
-  for (i=0; i<ndfa; i++) {
-    fprintf(report, "DFA state %d\n", i);
-    if (dfas[i]->nfas) {
-      fprintf(report, "  NFA states :\n");
-      for (j0=0; j0<rup_N; j0++) {
-        current_nfas = dfas[i]->nfas[j0];
-        if (!current_nfas) continue;
-        j0_5 = j0<<5;
-        for (j1=0, mask=1UL; j1<32; mask<<=1, j1++) {
-          if (current_nfas & mask) {
-            fprintf(report, "    %s\n", b->states[j0_5 + j1]->name);
-          }
-        }
-      }
-      fprintf(report, "\n");
-    }
-    fprintf(report, "  Transitions :\n");
-    for (t=0; t<Nt; t++) {
-      int dest = dfas[i]->map[t];
-      if (dest >= 0) {
-        fprintf(report, "    %s -> %d\n", toktable[t], dest);
-      }
-    }
-    if (dfas[i]->defstate >= 0) {
-      fprintf(report, "  Use state %d as basis (%d fixups)\n",
-              dfas[i]->defstate, dfas[i]->best_diff);
-    }
-    if (dfas[i]->result) {
-      fprintf(report, "  Exit value : %s\n", dfas[i]->result);
-    }
-    
-    fprintf(report, "\n");
-  }
-}
-
-/* ================================================================= */
-/* Emit the exit value table. */
-
-static void
-print_exitval_table(Block *b)
-{
-  int N = b->nstates;
-  int Nt = ntokens;
-  int n, i, j;
-  extern char *prefix;
-  char ucprefix[1024];
-
-  if (prefix) {
-    printf("static short %s_exitval[] = {\n", prefix);
-  } else {
-    printf("static short exitval[] = {\n");
-  }
-  for (i=0; i<ndfa; i++) {
-    printf("%s", (dfas[i]->result) ? dfas[i]->result : defresult);
-    putchar ((i<(ndfa-1)) ? ',' : ' ');
-    printf(" /* State %d */\n", i);
-  }
-  printf("};\n\n");
-}
-
-/* ================================================================= */
-/* Print out the state/transition table uncompressed, i.e. every
-   token has an array entry in every state.  This is fast to access
-   but quite wasteful on memory with many states and many tokens. */
-
-static void
-print_uncompressed_tables(Block *b)
-{
-  int N = b->nstates;
-  int Nt = ntokens;
-  int n, i, j;
-  extern char *prefix;
-  char ucprefix[1024];
-
-  n = 0;
-  if (prefix) {
-    printf("static short %s_trans[] = {", prefix);
-  } else {
-    printf("static short trans[] = {");
-  }
-  for (i=0; i<ndfa; i++) {
-    for (j=0; j<Nt; j++) {
-      if (n>0) putchar (',');
-      if (n%8 == 0) {
-        printf("\n  ");
-      } else {
-        putchar(' ');
-      }
-      n++;
-      printf("%4d", dfas[i]->map[j]);
-    }
-  }
-
-  printf("\n};\n\n");
-
-  if (prefix) {
-    char *p;
-    strcpy(ucprefix, prefix);
-    for (p=ucprefix; *p; p++) {
-      *p = toupper(*p);
-    }
-    printf("#define NEXT_%s_STATE(s,t) %s_trans[%d*(s)+(t)]\n",
-           ucprefix, prefix, Nt);
-  } else {
-    printf("#define NEXT_STATE(s,t) trans[%d*(s)+(t)]\n", Nt);
-  }
-}
-
-/* ================================================================= */
-
-static int
-check_include_char(int this_state, int token)
-{
-  if (dfas[this_state]->defstate >= 0) {
-    return (dfas[this_state]->map[token] !=
-            dfas[dfas[this_state]->defstate]->map[token]);
-  } else {
-    return (dfas[this_state]->map[token] >= 0);
-  }
-}
-
-/* ================================================================= */
-/* Print state/transition table in compressed form.  This is more
-   economical on storage, but requires a bisection search to find
-   the next state for a given current state & token */
-
-static void
-print_compressed_tables(Block *b)
-{
-  int N = b->nstates;
-  int *basetab = new_array(int, ndfa+1);
-  int Nt = ntokens;
-  int n, i, j;
-  extern char *prefix;
-
-
-  n = 0;
-  if (prefix) {
-    printf("static unsigned char %s_token[] = {", prefix);
-  } else {
-    printf("static unsigned char token[] = {");
-  }
-  for (i=0; i<ndfa; i++) {
-    for (j=0; j<Nt; j++) {
-      if (check_include_char(i, j)) {
-        if (n>0) putchar (',');
-        if (n%8 == 0) {
-          printf("\n  ");
-        } else {
-          putchar(' ');
-        }
-        n++;
-        printf("%3d", j);
-      }
-    }
-  }
-  printf("\n};\n\n");
-
-  n = 0;
-  if (prefix) {
-    printf("static short %s_nextstate[] = {", prefix);
-  } else {
-    printf("static short nextstate[] = {");
-  }
-  for (i=0; i<ndfa; i++) {
-    basetab[i] = n;
-    for (j=0; j<Nt; j++) {
-      if (check_include_char(i, j)) {
-        if (n>0) putchar (',');
-        if (n%8 == 0) {
-          printf("\n  ");
-        } else {
-          putchar(' ');
-        }
-        n++;
-        printf("%5d", dfas[i]->map[j]);
-      }
-    }
-  }
-  printf("\n};\n\n");
-  basetab[ndfa] = n;
-
-  n = 0;
-  if (prefix) {
-    printf("static unsigned short %s_base[] = {", prefix);
-  } else {
-    printf("static unsigned short base[] = {");
-  }
-  for (i=0; i<=ndfa; i++) {
-    if (n>0) putchar (',');
-    if (n%8 == 0) {
-      printf("\n  ");
-    } else {
-      putchar(' ');
-    }
-    n++;
-    printf("%5d", basetab[i]);
-  }
-  printf("\n};\n\n");
-  
-  n = 0;
-  if (prefix) {
-    printf("static short %s_defstate[] = {", prefix);
-  } else {
-    printf("static short defstate[] = {");
-  }
-  for (i=0; i<ndfa; i++) {
-    if (n>0) putchar (',');
-    if (n%8 == 0) {
-      printf("\n  ");
-    } else {
-      putchar(' ');
-    }
-    n++;
-    printf("%5d", dfas[i]->defstate);
-  }
-  printf("\n};\n\n");
-
-  
-  free(basetab);
-}
-
-/* ================================================================= */
-
-void yyerror (char *s)
-{
-  extern int lineno;
-  fprintf(stderr, "%s at line %d\n", s, lineno);
-}
-
-/* ================================================================= */
-
-int yywrap(void) { return -1; }
-
-/* ================================================================= */
-
-int main (int argc, char **argv)
-{
-  int result;
-  State *start_state;
-  Block *main_block;
-
-  char *report_name = NULL;
-  verbose = 0;
-  report = NULL;
-
-  /* Parse cmd line arguments */
-  while (++argv, --argc) {
-    if (!strcmp(*argv, "-v")) {
-      verbose = 1;
-    } else if (!strcmp(*argv, "-r")) {
-      ++argv, --argc;
-      report_name = *argv;
-    }
-  }
-
-  if (report_name) {
-    report = fopen(report_name, "w");
-    if (!report) {
-      fprintf(stderr, "Can't open %s for writing, no report will be created\n", report_name);
-    }
-  }
-
-  if (verbose) {
-    fprintf(stderr, "General-purpose automaton builder\n");
-    fprintf(stderr, "Copyright (C) Richard P. Curnow  2000-2001\n");
-  }
-  
-  if (verbose) fprintf(stderr, "Parsing input...");
-  result = yyparse();
-  if (result > 0) exit(1);
-  if (verbose) fprintf(stderr, "\n");
-
-  start_state = get_curstate(); /* The last state to be current in the input file is the entry state of the NFA */
-  main_block = start_state->parent;
-  if (verbose) fprintf(stderr, "Computing epsilon closure...\n");
-  generate_epsilon_closure(main_block);
-  print_nfa(main_block);
-  if (verbose) fprintf(stderr, "Compressing NFA...\n");
-  compress_nfa(main_block);
-  build_transmap(main_block);
-  if (verbose) fprintf(stderr, "Building DFA...\n");
-  build_dfa(main_block, start_state->index);
-  if (report) {
-    fprintf(report, "--------------------------------\n"
-                    "DFA structure before compression\n"
-                    "--------------------------------\n");
-  }
-  print_dfa(main_block);
-  
-  if (verbose) fprintf(stderr, "\nCompressing DFA...\n");
-  ndfa = compress_dfa(dfas, ndfa, ntokens);
-
-  if (verbose) fprintf(stderr, "\nCompressing transition tables...\n");
-  compress_transition_table(dfas, ndfa, ntokens);
-
-  if (report) {
-    fprintf(report, "-------------------------------\n"
-                    "DFA structure after compression\n"
-                    "-------------------------------\n");
-  }
-  if (verbose) fprintf(stderr, "Writing outputs...\n");
-  print_dfa(main_block);
-
-  if (had_ambiguous_result) {
-    fprintf(stderr, "No output written, there were ambiguous exit values for accepting states\n");
-    exit(2);
-  }
-  
-  print_exitval_table(main_block);
-  print_compressed_tables(main_block);
-#if 0
-  print_uncompressed_tables(main_block);
-#endif
-
-  if (report) {
-    fclose(report);
-    report = NULL;
-  }
-  
-  return result;
-}
diff -urN jbofihe-0.36/n2d/n2d.h jbofihe-0.37/n2d/n2d.h
--- jbofihe-0.36/n2d/n2d.h	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/n2d.h	Thu Jan  1 01:00:00 1970
@@ -1,184 +0,0 @@
-/***************************************
-  $Header: /cvs/src/jbofihe/n2d/n2d.h,v 1.11 2001/03/18 22:19:53 richard Exp $
-
-  Header file for NFA->DFA conversion utility.
-  ***************************************/
-
-/* Copyright (C) Richard P. Curnow  2000-2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-#ifndef N2D_H
-#define N2D_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define new(T) ((T *) malloc(sizeof(T)))
-#define new_array(T,N) ((T *) malloc((N) * sizeof(T)))
-#define resize_array(T,arr,newN) ((T *) ((arr) ? realloc(arr,(newN)*sizeof(T)) : malloc((newN)*sizeof(T))))
-#define new_string(s) strcpy((char *)malloc((strlen(s)+1)*sizeof(char)),s)
-
-/* For typecasting, especially useful for declarations of local ptrs to args
-   of a qsort comparison fn */
-#define Castdecl(x, T, nx) T nx = (T) x
-
-#define Castderef(x, T, nx) T nx = *(T*) x
-
-/* Globally visible options to control reporting */
-extern FILE *report;
-extern int verbose;
-
-struct State;
-struct Block;
-
-typedef struct Translist {
-  struct Translist *next;
-  int token;
-  char *ds_name;
-  struct State *ds_ref;
-} Translist;
-
-typedef struct Stringlist {
-  struct Stringlist *next;
-  char *string;
-} Stringlist;
-  
-typedef struct State {
-  char *name;
-  int index; /* Array index in containing block */
-  struct Block *parent;
-  Translist *transitions;
-  Stringlist *exitvals;
-
-  /* Pointers to the nodes in the 'transitions' list, sorted into canonical order */
-  Translist **ordered_trans;
-  int n_transitions;
-
-  unsigned char removed; /* Flag indicating state has been pruned by compression stage */
-} State;
-
-typedef struct S_Stateset {
-  State **states;
-  int nstates;
-  int maxstates;
-} Stateset;
-
-#define HASH_BUCKETS 64
-#define HASH_MASK (HASH_BUCKETS-1)
-
-typedef struct Block {
-  char *name;
-
-  /* The master table of states within this block.  This has to be in a flat
-     array because we have to work with respect to state indices when doing the
-     2D bitmap stuff for the subset construction. */
-  State **states;
-  int nstates;
-  int maxstates;
-  
-  /* Hash table for getting rapid access to a state within the block, given
-     its name */
-  Stateset state_hash[HASH_BUCKETS];
-  
-  int subcount; /* Number for generating substates */
-} Block;
-
-typedef struct {
-  unsigned long *nfas;
-  unsigned long signature; /* All the longwords in the nfas array xor'ed together */
-  int index; /* Entry's own index in the array */
-  int *map; /* index by token code */
-  Stringlist *nfa_sl; /* NFA exit values */
-  char *result; /* Result token, computed by boolean expressions defined in input text */
-
-  /* Fields calculated in compdfa.c */
-  
-  /* The equivalence class the state is in. */
-  int eq_class;
-
-  /* Temp. storage for the new eq. class within a single pass of the splitting alg. */
-  int new_eq_class; 
-
-  /* Signature field from above is also re-used. */
-
-  int is_rep; /* Set if state is chosen as the representative of its equivalence class. */
-  int new_index; /* New index assigned to the state. */
-
-  /* Fields calculated in tabcompr.c */
-  
-  unsigned long transition_sig;
-
-  /* Default state, i.e. the one that supplies transitions for tokens not explicitly listed for this one. */
-  int defstate; 
-  /* Number of transitions that this state has different to those in the default state. */
-  int best_diff; 
-  
-
-
-} DFANode;
-
-
-/* Constants for 'create' args */  
-#define USE_OLD_MUST_EXIST 0
-#define CREATE_MUST_NOT_EXIST 1
-#define CREATE_OR_USE_OLD 2
-
-State *get_curstate(void);
-
-struct Abbrev;
-extern struct Abbrev * create_abbrev(char *name);
-extern void add_tok_to_abbrev(struct Abbrev *abbrev, char *tok);
-
-int lookup_token(char *name, int create);
-Block *lookup_block(char *name, int create);
-State *lookup_state(Block *in_block, char *name, int create);
-Stringlist * add_token(Stringlist *existing, char *token);
-void add_transitions(State *curstate, Stringlist *tokens, char *destination);
-State * add_transitions_to_internal(Block *curblock, State *addtostate, Stringlist *tokens);
-void add_exit_value(State *curstate, char *value);
-void instantiate_block(Block *curblock, char *block_name, char *instance_name);
-void fixup_state_refs(Block *b);
-
-void compress_nfa(Block *b);
-
-typedef struct Expr Expr;
-Expr * new_wild_expr(void);
-Expr * new_not_expr(Expr *c);
-Expr * new_and_expr(Expr *c1, Expr *c2);
-Expr * new_or_expr(Expr *c1, Expr *c2);
-Expr * new_xor_expr(Expr *c1, Expr *c2);
-Expr * new_cond_expr(Expr *c1, Expr *c2, Expr *c3);
-Expr * new_sym_expr(char *sym_name);
-void define_symbol(char *name, Expr *e);
-void define_result(char *string, Expr *e);
-void define_symresult(char *string, Expr *e);
-void define_defresult(char *string);
-void clear_symbol_values(void);
-void set_symbol_value(char *sym_name);
-int evaluate_result(char **);
-
-void compress_transition_table(DFANode **dfas, int ndfas, int ntokens);
-unsigned long increment(unsigned long x, int field);
-unsigned long count_bits_set(unsigned long x);
-
-/* Return new number of DFA states */
-int compress_dfa(DFANode **dfas, int ndfas, int ntokens);
-
-#endif /* N2D_H */
-
diff -urN jbofihe-0.36/n2d/parse.y jbofihe-0.37/n2d/parse.y
--- jbofihe-0.36/n2d/parse.y	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/parse.y	Thu Jan  1 01:00:00 1970
@@ -1,145 +0,0 @@
-/**********************************************************************
-  $Header: /cvs/src/jbofihe/n2d/parse.y,v 1.7 2001/03/18 21:48:11 richard Exp $
-
-  Grammar definition for input files defining an NFA
-
- *********************************************************************/
-
-/* Copyright (C) Richard P. Curnow  2000-2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-%{
-#include "n2d.h"
-
-static Block *curblock = NULL; /* Current block being built */
-static State *curstate = NULL; /* Current state being worked on */
-static State *addtostate = NULL; /* Current state (incl ext) to which transitions are added */
-static struct Abbrev *curabbrev = NULL; /* Current definition being worked on */
-static Stringlist *curtranslist = NULL; /* Transition list prior to ARROW */
-
-/* Prefix set by prefix command */
-char *prefix = NULL;
-
-State *get_curstate(void) { return curstate; }
-
-%}
-
-%union {
-    char *s;
-    int i;
-    Stringlist *sl;
-    Expr *e;
-}
-
-%token STRING STATE TOKENS PREFIX ARROW BLOCK ENDBLOCK COLON EQUAL SEMICOLON COMMA
-%token ABBREV DEFINE 
-%type<s> STRING option
-%type<sl> option_seq transition_seq
-%type<e> expr
-
-%token RESULT SYMBOL SYMRESULT DEFRESULT
-%token STAR
-%right QUERY COLON
-%left PIPE
-%left XOR
-%left AND
-%left NOT
-%left LPAREN RPAREN
-
-%%
-
-all : decl_seq ;
-
-decl_seq : /* empty */ | decl_seq decl ;
-
-decl : block_decl | tokens_decl | prefix_decl | abbrev_decl | result_decl ;
-
-/* Don't invalidate curstate at the end, this is the means of working out the
-   starting state of the NFA */
-block_decl : block1 block2 { fixup_state_refs(curblock); curblock = NULL; } ;
-
-block1 : BLOCK STRING { curblock = lookup_block($2, CREATE_MUST_NOT_EXIST); addtostate = curstate = NULL; } ;
-
-block2 : instance_decl_seq state_decl_seq ENDBLOCK ;
-
-prefix_decl : PREFIX STRING { prefix = $2; };
-
-tokens_decl : TOKENS token_seq ;
-
-abbrev_decl : ABBREV STRING { curabbrev = create_abbrev($2); }
-              EQUAL string_pipe_seq
-            ;
-
-token_seq : token_seq token | token ;
-
-string_pipe_seq : string_pipe_seq PIPE STRING { add_tok_to_abbrev(curabbrev, $3); }
-                |                      STRING { add_tok_to_abbrev(curabbrev, $1); }
-                ;
-
-token : STRING { (void) lookup_token($1, CREATE_MUST_NOT_EXIST); }
-
-instance_decl_seq : /* empty */ | instance_decl_seq instance_decl ;
-
-state_decl_seq : /* empty */ | state_decl_seq state_decl ;
-
-state_decl : STATE STRING { addtostate = curstate = lookup_state(curblock, $2, CREATE_OR_USE_OLD); } sdecl_seq ;
-
-sdecl_seq : /* empty */ | sdecl_seq sdecl ;
-
-sdecl : transition_decl ;
-
-instance_decl : STRING COLON STRING { instantiate_block(curblock, $3 /* master_block_name */, $1 /* instance_name */ ); } ;
-
-transition_decl : transition_seq ARROW { curtranslist = $1; } destination_seq { addtostate = curstate; }
-                | transition_seq EQUAL STRING { addtostate = add_transitions_to_internal(curblock, addtostate, $1);
-                                                add_exit_value(addtostate, $3);
-                                                addtostate = curstate; }
-                ;
-
-destination_seq : STRING                       { add_transitions(addtostate, curtranslist, $1); }
-                | destination_seq COMMA STRING { add_transitions(addtostate, curtranslist, $3); }
-                ;
-
-transition_seq : option_seq { $$ = $1; }
-               | transition_seq SEMICOLON option_seq { addtostate = add_transitions_to_internal(curblock, addtostate, $1); $$ = $3; }
-               ;
-
-option_seq : option { $$ = add_token(NULL, $1); }
-           | option_seq PIPE option { $$ = add_token($1, $3); } ;
-
-option : STRING 
-       | /* empty */ { $$ = NULL; }
-       ;
-
-result_decl : RESULT STRING               { define_result($2, NULL); }
-            | RESULT    expr ARROW STRING { define_result($4, $2); }
-            | SYMRESULT expr ARROW STRING { define_symresult($4, $2); }
-            | SYMBOL STRING EQUAL expr    { define_symbol($2, $4); }
-            | DEFRESULT STRING            { define_defresult($2); }
-            ;
-
-expr : NOT expr { $$ = new_not_expr($2); }
-     | expr AND expr { $$ = new_and_expr($1, $3); }
-     | expr PIPE /* OR */ expr { $$ = new_or_expr($1, $3); }
-     | expr XOR expr { $$ = new_xor_expr($1, $3); }
-     | expr QUERY expr COLON expr { $$ = new_cond_expr($1, $3, $5); }
-     | LPAREN expr RPAREN { $$ = $2; }
-     | STRING { $$ = new_sym_expr($1); }
-     | STAR { $$ = new_wild_expr(); }
-     ;
-
diff -urN jbofihe-0.36/n2d/scan.l jbofihe-0.37/n2d/scan.l
--- jbofihe-0.36/n2d/scan.l	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/scan.l	Thu Jan  1 01:00:00 1970
@@ -1,67 +0,0 @@
-/**********************************************************************
-  $Header: /cvs/src/jbofihe/n2d/scan.l,v 1.7 2001/03/18 21:48:11 richard Exp $
-
-  Lexical analyser definition for input files defining an NFA
-
- *********************************************************************/
-
-/* Copyright (C) Richard P. Curnow  2000-2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-%{
-#include "n2d.h"
-#include "parse.h"
-
-int lineno = 1;
-%}
-
-%%
-
-STATE|State|state           { return STATE; }
-ABBREV|Abbrev|abbrev        { return ABBREV; }
-DEFINE|Define|define        { return DEFINE; }
-TOKENS|Tokens|tokens        { return TOKENS; }
-PREFIX|Prefix|prefix        { return PREFIX; }
-BLOCK|Block|block           { return BLOCK; }
-ENDBLOCK|EndBlock           { return ENDBLOCK; }
-Endblock|endblock           { return ENDBLOCK; }
-RESULT|Result|result        { return RESULT; }
-DEFRESULT|DefResult         { return DEFRESULT; }
-Defresult|defresult         { return DEFRESULT; }
-SYMBOL|Symbol|symbol        { return SYMBOL; }
-SYMRESULT|SymResult         { return SYMRESULT; }
-Symresult|symresult         { return SYMRESULT; }
-[A-Za-z0-9_.]+              { yylval.s = new_string(yytext); return STRING; }
-\#.*$                       { /* strip comments */ }
-\-\>                        { return ARROW; }
-=                           { return EQUAL; }
-\|                          { return PIPE; /* OR */ }
-\&                          { return AND; }
-\~                          { return NOT; }
-\!                          { return NOT; }
-\^                          { return XOR; }
-\*                          { return STAR; }
-\?                          { return QUERY; }
-\:                          { return COLON; }
-\;                          { return SEMICOLON; }
-\(                          { return LPAREN; }
-\)                          { return RPAREN; }
-\,                          { return COMMA; }
-\n                          { lineno++; }
-[ \t]+                      { /* ignore */ }
-
diff -urN jbofihe-0.36/n2d/tabcompr.c jbofihe-0.37/n2d/tabcompr.c
--- jbofihe-0.36/n2d/tabcompr.c	Mon Mar 26 22:03:04 2001
+++ jbofihe-0.37/n2d/tabcompr.c	Thu Jan  1 01:00:00 1970
@@ -1,181 +0,0 @@
-/***************************************
-  $Header: /cvs/src/jbofihe/n2d/tabcompr.c,v 1.3 2001/03/18 21:48:11 richard Exp $
-
-  Routines to compress the DFA transition tables, by identifying where two DFA
-  states have a lot of transitions the same.
-  ***************************************/
-
-/* Copyright (C) Richard P. Curnow  2001 */
-/*
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- * 
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- * 
-*/
-
-#include "n2d.h"
-
-/* ================================================================= */
-/* Treat 'x' as a set of 16 bit pairs, with field (0..15) specifying
-   which.  Increment the field'th bit pair as a gray code, in the
-   pattern 00->01->11->10->00 */
-
-unsigned long increment(unsigned long x, int field)
-{
-  int f2 = field + field;
-  static unsigned char transxor[4] = {1, 2, 2, 1};
-  unsigned long g = x >> f2;
-  unsigned long h = transxor[g&3];
-  return x ^ (h<<f2);
-}
-
-/* ================================================================= */
-/* Calculate the number of bits set in an unsigned long. */
-
-unsigned long count_bits_set(unsigned long x)
-{
-  unsigned long y = x;
-  unsigned long c;
-  c = 0x55555555UL;
-  y = ((y>>1) & c) + (y & c);
-  c = 0x33333333UL;
-  y = ((y>>2) & c) + (y & c);
-  y = (y>>4) + y;
-  c = 0x0f0f0f0fUL;
-  y &= c;
-  y = (y>>8) + y;
-  y = (y>>16) + y;
-  return y & 0x1f;
-}
-
-/* ================================================================= */
-/* Compute 'signatures' of the transitions out of a particular state.
-   The signature is given by considering the destination state numbers mod 16,
-   and counting how many transitions there are in each resulting equivalence
-   class.  The number is encoded using the gray code implied by the increment
-   fn. */
-
-static void
-compute_transition_sigs(DFANode **dfas, int ndfas, int ntokens)
-{
-  int i, j;
-  for (i=0; i<ndfas; i++) {
-    unsigned long ts = 0UL; /* transition signature */
-    for (j=0; j<ntokens; j++) {
-      unsigned long dest = dfas[i]->map[j];
-      dest &= 0xf; /* 16 bit pairs in 'ts' */
-      ts = increment(ts, dest);
-    }
-    dfas[i]->transition_sig = ts;
-  }
-}
-
-
-/* ================================================================= */
-
-#define REQUIRED_BENEFIT 2
-
-static void
-find_default_states(DFANode **dfas, int ndfas, int ntokens)
-{
-  int i, j, t;
-  int best_index;
-  int best_diff;
-  int trans_count; /* Number of transitions in working state */
-  unsigned long tsi;
-
-  for (i=0; i<ndfas; i++) {
-    trans_count = 0;
-    for (t=0; t<ntokens; t++) {
-      if (dfas[i]->map[t] >= 0) trans_count++;
-    }
-  
-    dfas[i]->defstate = -1; /* not defaulted */
-    best_index = -1;
-    best_diff = ntokens + 1; /* Worse than any computed value */
-    tsi = dfas[i]->transition_sig;
-    for (j=0; j<i; j++) {
-      unsigned long tsj;
-      unsigned long sigdiff;
-      int diffsize;
-
-      if (dfas[j]->defstate >= 0) continue; /* Avoid chains of defstates */
-      tsj = dfas[j]->transition_sig;
-
-      /* This is the heart of the technique : if we xor two vectors of bit
-         pairs encoded with the gray code above, and count the number of bits
-         set in the result, we get the sum of absolute differences of the bit
-         pairs.   The number of outgoing transitions that differ between the
-         states must be _at_least_ this value.  It may in fact be much greater
-         (i.e. we may get 'false matches').  However, this algorithm is a quick
-         way of filtering most of the useless potential default states out. */
-      
-      sigdiff = tsi ^ tsj;
-      diffsize = count_bits_set(sigdiff);
-      if (diffsize >= best_diff) continue;
-      if (diffsize >= trans_count) continue; /* Else pointless! */
-
-      /* Otherwise, do an exact check (i.e. see how much false matching we
-         suffered). */
-      diffsize = 0;
-      for (t=0; t<ntokens; t++) {
-        if (dfas[i]->map[t] != dfas[j]->map[t]) {
-          diffsize++;
-        }
-      }
-
-      if (((best_index < 0) || (diffsize < best_diff))
-          &&
-          (diffsize < (trans_count - REQUIRED_BENEFIT))) {
-        best_index = j;
-        best_diff = diffsize;
-      }
-    }
-
-    dfas[i]->defstate = best_index;
-    dfas[i]->best_diff = best_diff;
-  }
-}
-
-/* ================================================================= */
-
-void
-compress_transition_table(DFANode **dfas, int ndfas, int ntokens)
-{
-  compute_transition_sigs(dfas, ndfas, ntokens);
-  find_default_states(dfas, ndfas, ntokens);
-}
-
-/* ================================================================= */
-
-#ifdef TEST
-int main () {
-  unsigned long x = 0;
-  unsigned long x1, x2, x3, x4;
-  x1 = increment(x,  2);
-  x2 = increment(x1, 2);
-  x3 = increment(x2, 2);
-  x4 = increment(x3, 2);
-  printf("%d %d %d %d %d\n", x, x1, x2, x3, x4);
-
-  printf("1=%d\n", count_bits_set(0x00000001));
-  printf("2=%d\n", count_bits_set(0x00000003));
-  printf("3=%d\n", count_bits_set(0x00000007));
-  printf("4=%d\n", count_bits_set(0x0000000f));
-  printf("4=%d\n", count_bits_set(0xf0000000));
-  
-  return 0;
-}
-#endif
-
-  
diff -urN jbofihe-0.36/nodes.h jbofihe-0.37/nodes.h
--- jbofihe-0.36/nodes.h	Mon Mar 26 22:03:06 2001
+++ jbofihe-0.37/nodes.h	Wed Aug  8 22:41:48 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/nodes.h,v 1.13 2001/03/09 22:18:56 richard Exp $
+  $Header: /cvs/src/jbofihe/nodes.h,v 1.14 2001/05/30 20:48:30 richard Exp $
 
   Node type definitions for use in the bison parser and its interface
   with the lexer / preprocessor.
@@ -211,7 +211,10 @@
 } XTT_Brivla;
 
 typedef struct {
+  /* The tag (BAI, tense etc that does the modifying) */
   struct treenode *tag;
+  /* The tanru_unit_2 that is modified. */
+  struct treenode *inner_tu2;
 } XTT_JaiTag;
 
 typedef struct {
diff -urN jbofihe-0.36/output.c jbofihe-0.37/output.c
--- jbofihe-0.36/output.c	Mon Mar 26 22:03:07 2001
+++ jbofihe-0.37/output.c	Wed Aug  8 22:41:48 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/output.c,v 1.27 2001/03/11 22:12:01 richard Exp $
+  $Header: /cvs/src/jbofihe/output.c,v 1.31 2001/06/22 22:16:04 richard Exp $
 
   Generate glossed output, calling the appropriate backend (latex,
   text, html etc).
@@ -47,12 +47,7 @@
 /*+ Forward prototype +*/
 static void output_internal(TreeNode *x, WhatToShow what);
 
-/*++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++*/
-
-static void
-add_bracketing_internal(TreeNode *x, int *seq)
+static void add_bracketing_internal(TreeNode *x, int *seq)/*{{{*/
 {
   struct nonterm *y;
   int i, n;
@@ -155,6 +150,13 @@
         }
         break;
         
+      case TANRU_UNIT_2:
+        if (prop_require_brac(x, NO)) {
+          y->number = ++*seq;
+          y->brackets = BR_BRACE;
+        }
+        break;
+        
       case FREE:
         y->number = ++*seq;
         y->brackets = BR_ROUND;
@@ -185,34 +187,18 @@
   }
 
 }
-
-/*++++++++++++++++++++++++++++++
-  Go through parse tree and mark specific non-terminals with
-  bracketing type and sequence number
-  ++++++++++++++++++++++++++++++*/
-
-void
-add_bracketing_tags(TreeNode *top)
+/*}}}*/
+void add_bracketing_tags(TreeNode *top)/*{{{*/
+/* Go through parse tree and mark specific non-terminals with bracketing type
+   and sequence number */
 {
   int seq = 0;
 
   add_bracketing_internal(top, &seq);
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TreeNode *x
-
-  char *loj
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_bai (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_bai (TreeNode *x, char *eng)/*{{{*/
 {
   XBaiConversion *baiconv;
   XTenseCtx *xtc;
@@ -263,19 +249,8 @@
   }
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  TreeNode *x
-
-  char *loj
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_se (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_se (TreeNode *x, char *eng)/*{{{*/
 {
   char *trans;
 
@@ -290,18 +265,8 @@
     }
   }
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  TreeNode *x
-
-  char *loj
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_goi (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_goi (TreeNode *x, char *eng)/*{{{*/
 {
   char *trans;
 
@@ -316,18 +281,8 @@
     }
   }
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  TreeNode *x
-
-  char *loj
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_koha (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_koha (TreeNode *x, char *eng)/*{{{*/
 {
   char *trans;
   char *cmavo;
@@ -354,18 +309,8 @@
     }
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TreeNode *x
-
-  char *sofar
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-get_cmavo_text_inside_node_internal(TreeNode *x, char *sofar)
+/*}}}*/
+static void get_cmavo_text_inside_node_internal(TreeNode *x, char *sofar)/*{{{*/
 {
   struct nonterm *nt;
   int i, n;
@@ -404,36 +349,16 @@
   return;
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Retrieve the lojban text for the cmavo inside a node, excluding
-  indicator stuff.
-
-  static char * get_cmavo_text_inside_node
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-
-static char *
-get_cmavo_text_inside_node(TreeNode *x)
+/*}}}*/
+static char * get_cmavo_text_inside_node(TreeNode *x)/*{{{*/
 {
   static char buffer[4096];
   buffer[0] = 0;
   get_cmavo_text_inside_node_internal(x, buffer);
   return buffer;
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  TreeNode *x
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_other_cmavo (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_other_cmavo (TreeNode *x, char *eng)/*{{{*/
 {
   char *trans;
   
@@ -444,20 +369,8 @@
     eng[0] = 0;
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TreeNode *x
-
-  char *loj
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_indicator (TreeNode *x, char *loj, char *eng)
+/*}}}*/
+static void translate_indicator (TreeNode *x, char *loj, char *eng)/*{{{*/
 {
   char *trans;
   int negated;
@@ -553,20 +466,8 @@
   }
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  static char * translate_tense_in_context
-
-  char *text
-
-  enum tense_contexts ctx
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-translate_tense_in_context(char *text, enum tense_contexts ctx)
+/*}}}*/
+static char * translate_tense_in_context(char *text, enum tense_contexts ctx)/*{{{*/
 {
   char buffer[128];
   char *trans;
@@ -615,17 +516,8 @@
     return trans; /* tough if null */
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  TreeNode *x
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_tense (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_tense (TreeNode *x, char *eng)/*{{{*/
 {
   XTenseCtx *ctx;
   char buffer[128], *trans;
@@ -647,18 +539,8 @@
     translate_other_cmavo(x, eng);
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Process nonterminals of type time_offset.
-
-  TreeNode *x
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_time_offset (TreeNode *x, char *loj, char *eng)
+/*}}}*/
+static void translate_time_offset (TreeNode *x, char *loj, char *eng)/*{{{*/
 {
   XTenseCtx *ctx;
   char *trans;
@@ -710,16 +592,8 @@
   }
 
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  TreeNode *x
-
-  char *eng
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_jai (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_jai (TreeNode *x, char *eng)/*{{{*/
 {
   XGlosstype *xgt;
   char buffer[128], *trans;
@@ -753,21 +627,8 @@
     translate_other_cmavo(x, eng);
   }
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  If the lojban text comes from a source that supports conversion (and may
-  require different glossing depending on context), apply the appropriate
-  adjustments to translate it.
-
-  char *loj The lojban text to translate (not derived from 'basis' since this
-  fn is used for several different node types).
-
-  TreeNode *basis The treenode on which the gloss-type and conversion tags may
-  be hanging.
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-translate_convertible(char *loj, TreeNode *basis)
+/*}}}*/
+static char * translate_convertible(char *loj, TreeNode *basis)/*{{{*/
 {
   char buffer[1024];
   char *trans;
@@ -817,7 +678,7 @@
         }
         return trans;
       } else {
-        trans = translate_unknown(loj, conv);
+        trans = translate_unknown(loj, conv, TCX_NOUN);
         if (trans) {
           return trans;
         } else {
@@ -827,13 +688,8 @@
     }
   }
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_brivla (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_brivla (TreeNode *x, char *eng)/*{{{*/
 {
   char *trans;
 
@@ -844,13 +700,8 @@
     eng[0] = 0;
   }
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-translate_abstraction (TreeNode *x, char *eng)
+/*}}}*/
+static void translate_abstraction (TreeNode *x, char *eng)/*{{{*/
 {
   char *trans;
   int code;
@@ -866,13 +717,8 @@
   }
 
 }
-
-/*++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++*/
-
-static void
-attempt_translation(char *loj, char *eng)
+/*}}}*/
+static void attempt_translation(char *loj, char *eng)/*{{{*/
 {
   char *trans;
   trans = translate(loj);
@@ -882,13 +728,8 @@
     strcpy(eng, "?");
   }
 }
-
-/*++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++*/
-
-static void
-get_lojban_word_and_translation (TreeNode *x, char *loj, char *eng)
+/*}}}*/
+static void get_lojban_word_and_translation (TreeNode *x, char *loj, char *eng)/*{{{*/
 {
 
   switch (x->type) {
@@ -1015,17 +856,8 @@
   }
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-
-static void
-output_term(TreeNode *x, WhatToShow what)
+/*}}}*/
+static void output_term(TreeNode *x, WhatToShow what)/*{{{*/
 {
   XTermTags *xtt;
   char *trans;
@@ -1044,30 +876,31 @@
       do {
         tag = &(xtt->tag);
         switch (tag->type) {
-          case TTT_BRIVLA:
+          case TTT_BRIVLA:/*{{{*/
             trans = adv_translate(tag->brivla.x->data.brivla.word, tag->pos, TCX_TAG);
             if (!trans) trans = "?";
             sprintf(tp, "%d", tag->pos);
             (drv->start_tag)();
             (drv->write_tag_text)(tag->brivla.x->data.brivla.word, tp, trans, YES);
-            break;
-          case TTT_JAITAG:
-            (drv->start_tag)();
-            output_internal(tag->jaitag.tag, SHOW_TAG_TRANS);
-            trans = adv_translate(tag->brivla.x->data.brivla.word, tag->pos, TCX_VERB);
-            sprintf(tp, "%d", tag->pos);
-            if (trans) {
-              (drv->write_tag_text) ("", "", trans, NO);
+            break;/*}}}*/
+          case TTT_JAITAG:/*{{{*/
+            {
+              char transbuf[1024];
+              (drv->start_tag)();
+              (drv->write_partial_tag_text)("jai+<tag>1: (");
+              output_internal(tag->jaitag.tag, SHOW_TAG_TRANS);
+              sprintf(transbuf, "#%d)", tag->jaitag.inner_tu2->data.nonterm.number);
+              (drv->write_partial_tag_text) (transbuf);
             }
-            break;
-          case TTT_JAI:
+            break;/*}}}*/
+          case TTT_JAI:/*{{{*/
             trans = adv_translate(tag->brivla.x->data.brivla.word, tag->pos, TCX_TAG);
             if (!trans) trans = "?";
             sprintf(tp, "%d", tag->pos);
             (drv->start_tag)();
             (drv->write_tag_text)(tag->brivla.x->data.brivla.word, tp, trans, YES);
-            break;
-          case TTT_ABSTRACTION:
+            break;/*}}}*/
+          case TTT_ABSTRACTION:/*{{{*/
             {
               int code;
               char *cmavo;
@@ -1080,8 +913,8 @@
               (drv->write_tag_text)(cmavo, tp, trans, YES);
             }
           break;
-
-          case TTT_ME:
+/*}}}*/
+          case TTT_ME:/*{{{*/
             {
               char *trans, transbuf[1024];
               (drv->start_tag)();
@@ -1090,8 +923,8 @@
               sprintf(tp, "%d..", tag->pos);
               (drv->write_tag_text)("me", tp, transbuf, YES);
             }
-          break;
-          case TTT_NUMBERMOI:
+          break;/*}}}*/
+          case TTT_NUMBERMOI:/*{{{*/
             {
               char *trans, lojbuf[128], transbuf[1024];
               int code;
@@ -1111,8 +944,8 @@
               (drv->write_tag_text)(lojbuf, "", transbuf, YES);
             }
             break;
-            
-          case TTT_GOhA:
+            /*}}}*/
+          case TTT_GOhA:/*{{{*/
             {
               int code;
               char *cmavo;
@@ -1125,8 +958,8 @@
               (drv->write_tag_text)(lojbuf, "", "", YES);
             }
             break;
-          
-          case TTT_NUhA:
+          /*}}}*/
+          case TTT_NUhA:/*{{{*/
             {
               TreeNode *mex_operator = tag->nuha.mex_operator;
               int number = mex_operator->data.nonterm.number;
@@ -1136,8 +969,8 @@
               (drv->write_tag_text)(lojbuf, "", "", YES);
             }
             break;
-
-          case TTT_ZEI:
+/*}}}*/
+          case TTT_ZEI:/*{{{*/
             {
               int number;
               char lojbuf[32];
@@ -1149,7 +982,7 @@
               (drv->write_tag_text)(lojbuf, "", trans, YES);
             }
             break;
-            
+            /*}}}*/
           default:
             break;
         }
@@ -1164,17 +997,8 @@
     output_internal(y->children[i], what);
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-
-static void
-output_simple_time_offset(TreeNode *x, WhatToShow what)
+/*}}}*/
+static void output_simple_time_offset(TreeNode *x, WhatToShow what)/*{{{*/
 {
   char loj[1024], eng[1024];
   int i, n;
@@ -1215,18 +1039,8 @@
       break;
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Special output processing for <sumti_tail>.  See if there's a
-  <sumti_6> at the start.  If so, do a special gloss to turn it into a
-  genetive form.
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-output_sumti_tail(TreeNode *x, WhatToShow what)
+/*}}}*/
+static void output_sumti_tail(TreeNode *x, WhatToShow what)/*{{{*/
 {
   int n, i;
   struct nonterm *y;
@@ -1266,20 +1080,8 @@
     }
   }
 }      
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Handle the various types of jek, ek and gihek non-terminal.  In
-  connect.c, we have worked out the truth function being expressed and
-  stored it on a property.
-
-  TreeNode *x
-
-  WhatToShow what
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-output_fore_or_afterthought(TreeNode *x, WhatToShow what)
+/*}}}*/
+static void output_fore_or_afterthought(TreeNode *x, WhatToShow what)/*{{{*/
 {
   XConnective *xcon;
   struct nonterm *y;
@@ -1332,6 +1134,8 @@
           }
         }
 
+        break;
+
       case CNP_GE_STAG:
         /* This is what I think negations on this construct mean ... I
            think it translates as scalar negations of the two phrases.
@@ -1345,7 +1149,8 @@
             (drv->translation)("something other than");
           }
         }
-        
+
+        break;
 
       case CNP_GE_JOIK:
         break; /* Don't put anything here */
@@ -1371,7 +1176,6 @@
 
         break;
 
-
       case CNP_GI_JOIK:
         /* Output the joik as though it occurred at the position of
            the gik in the middle of the sentence. */
@@ -1388,25 +1192,15 @@
         } else {
           output_internal(xcon->js, SHOW_ENGLISH);
         }
+        
+        break;
+
     }
   }
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  For certain types of non-terminal, gather together the constituent
-  children as a concatenated string.  Try to look this up in the
-  dictionary.  If it works output that translation, else drill down as
-  normal.
-
-  TreeNode *x
-
-  WhatToShow what
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-output_clustered(TreeNode *x, WhatToShow what)
+/*}}}*/
+static void output_clustered(TreeNode *x, WhatToShow what)/*{{{*/
 {
   char *cluster, *trans;
   char localtrans[256];
@@ -1496,18 +1290,13 @@
     }
   }
 }
-
-/*++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++*/
-
-static void
-output_internal(TreeNode *x, WhatToShow what)
+/*}}}*/
+static void output_internal(TreeNode *x, WhatToShow what)/*{{{*/
 {
   char loj[1024], eng[1024];
   int i, n;
 
-  if (x->type == N_NONTERM) {
+  if (x->type == N_NONTERM) {/*{{{*/
     struct nonterm *y;
     y = &x->data.nonterm;
 
@@ -1515,7 +1304,7 @@
       (drv->open_bracket)(y->brackets, y->number);
     }
 
-    if (y->type == SELBRI_3 &&
+    if (y->type == SELBRI_3 &&/*{{{*/
         y->nchildren == 2) {
       /* Special handling */
 
@@ -1530,12 +1319,12 @@
       }
           
       output_internal(y->children[1], what);
-
-    } else if (y->type == TERM) {
+/*}}}*/
+    } else if (y->type == TERM) {/*{{{*/
 
       output_term(x, what);
-      
-    } else if (y->type == NO_CU_SENTENCE) {
+      /*}}}*/
+    } else if (y->type == NO_CU_SENTENCE) {/*{{{*/
       /* Special handling */
 
       if (y->nchildren == 2) { /* not in 'insert elidables' mode */
@@ -1555,8 +1344,8 @@
           output_internal(y->children[i], what);
         }
       }
-
-    } else if (y->type == OBSERVATIVE_SENTENCE) {
+/*}}}*/
+    } else if (y->type == OBSERVATIVE_SENTENCE) {/*{{{*/
       /* Special handling */
 
 #if 0
@@ -1566,22 +1355,22 @@
       (drv->translation)("(there is)");
 #endif
       output_internal(y->children[0], what);
-
-    } else if (y->type == TIME_OFFSET) {
+/*}}}*/
+    } else if (y->type == TIME_OFFSET) {/*{{{*/
 
       output_simple_time_offset(x, what);
-
-    } else if ((y->type == SPACE_INT_PROP) ||
+/*}}}*/
+    } else if ((y->type == SPACE_INT_PROP) ||/*{{{*/
                (y->type == INTERVAL_PROPERTY) ||
                (y->type == NUMBER_MOI_TU2)) {
       
       output_clustered(x, what);
-
-    } else if (y->type == SUMTI_TAIL) {
+/*}}}*/
+    } else if (y->type == SUMTI_TAIL) {/*{{{*/
 
       output_sumti_tail(x, what);
-
-    } else if (((y->type == SUMTI_5A) && (y->nchildren ==2)) ||
+/*}}}*/
+    } else if (((y->type == SUMTI_5A) && (y->nchildren ==2)) ||/*{{{*/
                ((y->type == SUMTI_TAIL_1) &&
                 (y->children[0]->data.nonterm.type == QUANTIFIER) &&
                 (y->children[1]->data.nonterm.type == SUMTI))) {
@@ -1598,8 +1387,8 @@
       for (i=1; i<n; i++) {
         output_internal(y->children[i], what);
       }
-
-    } else if ((y->type == JEK) ||
+/*}}}*/
+    } else if ((y->type == JEK) ||/*{{{*/
                (y->type == JEK_OPT_KE) ||
                (y->type == JEK_OPT_KEBO) ||
                (y->type == EK) ||
@@ -1609,21 +1398,21 @@
                (y->type == GUHEK)) {
       
       output_fore_or_afterthought(x, what);
-
-    } else {
+/*}}}*/
+    } else {/*{{{*/
 
       n = y->nchildren;
       for (i=0; i<n; i++) {
         output_internal(y->children[i], what);
       }
-      
+      /*}}}*/
     }
 
     if (what == SHOW_BOTH || what == SHOW_LOJBAN || what == SHOW_LOJBAN_AND_INDICATORS) {
       (drv->close_bracket)(y->brackets, y->number);
     }
-
-  } else if (x->type == N_ZEI) {
+/*}}}*/
+  } else if (x->type == N_ZEI) {/*{{{*/
 
     if (what == SHOW_BOTH || what == SHOW_LOJBAN || what == SHOW_LOJBAN_AND_INDICATORS) {
       (drv->open_bracket)(x->data.zei.brackets, x->data.zei.number);
@@ -1664,8 +1453,8 @@
     if (what == SHOW_BOTH || what == SHOW_LOJBAN || what == SHOW_LOJBAN_AND_INDICATORS) {
       (drv->close_bracket)(x->data.zei.brackets, x->data.zei.number);
     }
-
-  } else {
+/*}}}*/
+  } else {/*{{{*/
     /* Terminal token */
     char lojbuf[1024];
 
@@ -1726,15 +1515,10 @@
 
 
   }
-
+/*}}}*/
 }
-
-/*++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++*/
-
-void
-do_output(TreeNode *top, DriverVector *driver)
+/*}}}*/
+void do_output(TreeNode *top, DriverVector *driver)/*{{{*/
 {
   drv = driver;
 
@@ -1745,3 +1529,4 @@
 
   drv->epilogue();
 }
+/*}}}*/
diff -urN jbofihe-0.36/output.h jbofihe-0.37/output.h
--- jbofihe-0.36/output.h	Mon Mar 26 22:03:07 2001
+++ jbofihe-0.37/output.h	Wed Aug  8 22:41:48 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/output.h,v 1.1 1999/06/12 08:32:03 richard Exp $
+  $Header: /cvs/src/jbofihe/output.h,v 1.2 2001/05/28 21:30:15 richard Exp $
 
   Header file shared between generic output code and the backend
   drivers.
@@ -41,6 +41,7 @@
   void (*end_tags)(void);
   void (*start_tag)(void);
   void (*write_tag_text)(char *, char *, char *, int);
+  void (*write_partial_tag_text)(char *);
 } DriverVector;
 
 #endif /* OUTPUT_H */
diff -urN jbofihe-0.36/patterns jbofihe-0.37/patterns
--- jbofihe-0.36/patterns	Mon Mar 26 22:03:07 2001
+++ jbofihe-0.37/patterns	Wed Aug  8 22:41:48 2001
@@ -1,4 +1,4 @@
-# $Id: patterns,v 1.4 2000/11/03 23:17:15 richard Exp $
+# $Id: patterns,v 1.5 2001/05/28 21:40:42 richard Exp $
 #
 # Pattern matching dictionary
 
@@ -124,6 +124,30 @@
 *4nu+4:@3
 *4nu+5:@4
 *4nu+6:@5
+*4mu'e+1:D;achievement* of %1v
+*4mu'e+2:@1
+*4mu'e+3:@2
+*4mu'e+4:@3
+*4mu'e+5:@4
+*4mu'e+6:@5
+*4pu'u+1:D;process* of %1v
+*4pu'u+2:@1
+*4pu'u+3:@2
+*4pu'u+4:@3
+*4pu'u+5:@4
+*4pu'u+6:@5
+*4za'i+1:D;state* of %1v
+*4za'i+2:@1
+*4za'i+3:@2
+*4za'i+4:@3
+*4za'i+5:@4
+*4za'i+6:@5
+*4zu'o+1:D;activity* of %1v
+*4zu'o+2:@1
+*4zu'o+3:@2
+*4zu'o+4:@3
+*4zu'o+5:@4
+*4zu'o+6:@5
 *4ka+1:D;quality* of %1v
 *4ka+2:@1
 *4ka+3:@2
diff -urN jbofihe-0.36/rpc2x.y jbofihe-0.37/rpc2x.y
--- jbofihe-0.36/rpc2x.y	Mon Mar 26 22:03:08 2001
+++ jbofihe-0.37/rpc2x.y	Wed Aug  8 22:41:50 2001
@@ -1,5 +1,5 @@
 /** -*-Fundamental-*- *************************************
-  $Header: /cvs/src/jbofihe/rpc2x.y,v 1.19 2001/01/28 21:47:10 richard Exp $
+  $Header: /cvs/src/jbofihe/rpc2x.y,v 1.20 2001/05/09 22:06:55 richard Exp $
 
   Bison parser generator input for Lojban grammar.
 
@@ -203,6 +203,8 @@
 %token PRIVATE_NAhE_space
 %token PRIVATE_NAhE_CAhA
 
+%token PRIVATE_NA_KU
+
 %token PRIVATE_NUMBER_MAI
 %token PRIVATE_NUMBER_MOI
 %token PRIVATE_NUMBER_ROI
@@ -842,8 +844,8 @@
                     | tag /* ET KU */
                     ;
 
-term_floating_negate : NA KU free_seq
-                     | NA KU
+term_floating_negate : PRIVATE_NA_KU NA KU free_seq
+                     | PRIVATE_NA_KU NA KU
                      ;
 
 /* Where on earth do these arise?  'FA' on its own can be in a fragment as an answer to a fa'i question,
diff -urN jbofihe-0.36/terms.c jbofihe-0.37/terms.c
--- jbofihe-0.36/terms.c	Mon Mar 26 22:03:09 2001
+++ jbofihe-0.37/terms.c	Wed Aug  8 22:41:51 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/terms.c,v 1.21 2001/03/09 22:18:56 richard Exp $
+  $Header: /cvs/src/jbofihe/terms.c,v 1.23 2001/05/30 20:48:31 richard Exp $
 
   Processing to work out which x-place of a particular selbri any term
   in the text occupies.
@@ -23,6 +23,7 @@
  * 
  *********************************************************************/
 
+/*{{{ #Includes  */
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -32,6 +33,7 @@
 #include "rpc_tab.h"
 #include "nodes.h"
 #include "cmavotab.h"
+/*}}}*/
 
 #define MAX_TERMS_IN_VECTOR 20
 
@@ -45,6 +47,8 @@
   pa is 101 etc +*/
 #define FAI_OFFSET 101
 
+/* Type definitions *//*{{{*/
+
 typedef enum {
   TRM_POS, /* positional, i.e. just bare sumti */
   TRM_FA,  /* a term with a FA in front */
@@ -81,8 +85,10 @@
   union {
     TermVector *links;
     SeInfo se;
-    TreeNode *jai_tag; /* The treenode for the tag in a JAI <tag>
-                          construction */
+    struct {
+      TreeNode *tag; /* The tag */
+      TreeNode *inner_tu2; /* The modified tanru_unit_2 */
+    } jai_tag;
   } data;
 } LinkConvEntry;
 
@@ -93,35 +99,19 @@
   int n;
   LinkConvEntry e[MAX_TERMS_IN_VECTOR];
 } LinkConv;
+/*}}}*/
+/* Forward prototypes *//*{{{*/
+static void process_bridi_tail(TreeNode *bt, TermVector *pre, TermVector *post);
+static void process_selbri_args(TreeNode *s, TermVector *pre, TermVector *post, LinkConv *lc);
+static void process_selbri_3_args(TreeNode *s3, TermVector *pre, TermVector *post, LinkConv *lc);
+/*}}}*/
 
-/*+ Forward prototype +*/
-static void
-process_bridi_tail(TreeNode *bt, TermVector *pre, TermVector *post);
-
-/*+ Forward prototype +*/
-static void
-process_selbri_args(TreeNode *s, TermVector *pre, TermVector *post, LinkConv *lc);
-
-static void
-process_selbri_3_args(TreeNode *s3, TermVector *pre, TermVector *post, LinkConv *lc);
-
-
-/*++++++++++++++++++++++++++++++
-  Initialise a term vector to empty
-  ++++++++++++++++++++++++++++++*/
-
-static void
-tv_init(TermVector *tv)
+static void tv_init(TermVector *tv)/*{{{*/
 {
   tv->n_nodes = 0;
 }
-
-/*++++++++++++++++++++++++++++++
-  Concatenate two vectors together, r = s1 ++ s2
-  ++++++++++++++++++++++++++++++*/
-
-static void
-tv_catenate(TermVector *s1, TermVector *s2, TermVector *r)
+/*}}}*/
+static void tv_catenate(TermVector *s1, TermVector *s2, TermVector *r)/*{{{*/
 {
   int tn;
   int i, n1, n2;
@@ -145,14 +135,10 @@
     r->nodes[n1+i] = s2->nodes[i];
   }
 }
-
-/*++++++++++++++++++++++++++++++
-  Reverse the order of a Termector.
-  ++++++++++++++++++++++++++++++*/
-
-static void
-tv_reverse(TermVector *dest, TermVector *src)
+/*}}}*/
+static void tv_reverse(TermVector *dest, TermVector *src)/*{{{*/
 {
+  /* Reverse the order of a Termector. */
   int n, i;
 
   assert(dest != src); /* Not designed to cope with this case */
@@ -162,33 +148,16 @@
   for (i=0; i<n; i++) {
     dest->nodes[i] = src->nodes[n-1-i];
   }
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Initialise a linkconv list to empty
-
-  LinkConv *lc
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-lc_init(LinkConv *lc)
+}/*}}}*/
+static void lc_init(LinkConv *lc)/*{{{*/
 {
+  /* Initialise a linkconv list to empty */
   lc->n = 0;
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Append a SE conversion to a linkconv chain
-
-  LinkConv *lc The vector to append to
-
-  int conv The conversion to append
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-lc_append_se(LinkConv *lc, int conv, TreeNode *senode)
+/*}}}*/
+static void lc_append_se(LinkConv *lc, int conv, TreeNode *senode)/*{{{*/
 {
+  /* Append a SE conversion to a linkconv chain */
   assert(lc->n < MAX_TERMS_IN_VECTOR);
 
   lc->e[lc->n].data.se.conv = conv;
@@ -196,37 +165,19 @@
   lc->e[lc->n].type = LC_SE;
   ++(lc->n);
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  LinkConv *lc
-
-  TreeNode *tag
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-lc_append_jai_tag(LinkConv *lc, TreeNode *tag)
+/*}}}*/
+static void lc_append_jai_tag(LinkConv *lc, TreeNode *tag, TreeNode *inner_tu2)/*{{{*/
 {
   assert(lc->n < MAX_TERMS_IN_VECTOR);
 
-  lc->e[lc->n].data.jai_tag = tag;
+  lc->e[lc->n].data.jai_tag.tag = tag;
+  lc->e[lc->n].data.jai_tag.inner_tu2 = inner_tu2;
   lc->e[lc->n].type = LC_TAG;
   ++(lc->n);
   
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  LinkConv *lc
-
-  TreeNode *tag
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-lc_append_jai(LinkConv *lc)
+/*}}}*/
+static void lc_append_jai(LinkConv *lc)/*{{{*/
 {
   assert(lc->n < MAX_TERMS_IN_VECTOR);
 
@@ -234,19 +185,11 @@
   ++(lc->n);
   
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Append a set of linked sumti to a linkconv chain
-
-  LinkConv *lc The vector to append to
-
-  TermVector *v The vector of linked sumti.  A dynamically allocated
-  COPY is made of this argument.
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-lc_append_links(LinkConv *lc, TermVector *v)
+/*}}}*/
+static void lc_append_links(LinkConv *lc, TermVector *v)/*{{{*/
 {
+  /* Append a set of linked sumti to a linkconv chain.  Note, a dynamic COPY is
+     made of 'v'. */
   assert(lc->n < MAX_TERMS_IN_VECTOR);
   lc->e[lc->n].data.links = new(TermVector);
 
@@ -256,39 +199,20 @@
   lc->e[lc->n].type = LC_LINKS;
   ++(lc->n);
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Copy a link/conv vector.  A semi-deep structure copy is made,
-  i.e. the pointers to the vectors of linked sumti get aliased.  The
-  intention is to produce a local copy onto which extra terms can be
-  appended, rather than to make a completely general copy.
-
-  const LinkConv *src The source for the copy
-
-  LinkConv *dest The destination of the copy.
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-lc_copy(const LinkConv *src, LinkConv *dest)
+/*}}}*/
+static void lc_copy(const LinkConv *src, LinkConv *dest)/*{{{*/
 {
-  /* Deep structure copy */
+/* Copy a link/conv vector.  A semi-deep structure copy is made, i.e. the
+  pointers to the vectors of linked sumti get aliased.  The intention is to
+  produce a local copy onto which extra terms can be appended, rather than to
+  make a completely general copy.  */
+  
   *dest = *src;
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Turn se, te... into 2 .. 5
-
-  static int recover_se_conv Return the value in the range 2 .. 5
-  corresponding to the SE cmavo.
-
-  TreeNode *x The parse node, must be a cmavo of selma'o SE.
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static int
-recover_se_conv(TreeNode *x)
+/*}}}*/
+static int recover_se_conv(TreeNode *x)/*{{{*/
 {
+  /* Turn se, te etc into 2..5 */
   int se_code;
   char *se_str;
   TreeNode *se;
@@ -314,22 +238,17 @@
     abort();
   }
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Turn fa, fe... into 1 .. 5
+/*}}}*/
+static int recover_fa_conv(TreeNode *x)/*{{{*/
+{
+/* Turn fa, fe... into 1 .. 5
 
   EXTEND TO COPE WITH SUBSCRIPTED VALUES!!!
 
   static int recover_fa_conv Return the value in the range 1 .. 5
   corresponding to the FA cmavo.
 
-  TreeNode *x The parse node, must be a cmavo of selma'o FA.
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static int
-recover_fa_conv(TreeNode *x)
-{
+  TreeNode *x The parse node, must be a cmavo of selma'o FA.  */
   int fa_code;
   char *fa_str;
 
@@ -360,18 +279,13 @@
     abort();
   }
 }
-
-/*++++++++++++++++++++++++++++++
-
-  Build a TermVector from a terms node in the parse tree.  Any terms_1
-  or terms_2 with CEhE or PEhE inside is ignored - to defer the
-  problem of what to do about afterthought termsets to somewhere else.
-
-  ++++++++++++++++++++++++++++++*/
-
-static void
-tv_build(TermVector *r, TreeNode *x)
+/*}}}*/
+static void tv_build(TermVector *r, TreeNode *x)/*{{{*/
 {
+/* Build a TermVector from a terms node in the parse tree.  Any terms_1
+  or terms_2 with CEhE or PEhE inside is ignored - to defer the problem of what
+  to do about afterthought termsets to somewhere else.  */
+
   TermVector vv;
   TreeNode *xx, *t1, *t2, *t, *tc, *tcc;
   struct nonterm *ntx, *ntt;
@@ -473,19 +387,8 @@
   tv_reverse(r, &vv);
 
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Analyse a subsentence (occurring within a gek_sentence)
-  
-  TreeNode *ss The subsentence node.
-
-  TermVector *pre Terms before selbri
-
-  TermVector *post Terms after selbri
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_subsentence(TreeNode *ss, TermVector *pre, TermVector *post)
+/*}}}*/
+static void process_subsentence(TreeNode *ss, TermVector *pre, TermVector *post)/*{{{*/
 {
   struct nonterm *nt, *ntc;
   int nc;
@@ -537,9 +440,9 @@
     process_bridi_tail(btail, pre, post);
   }
 }
+/*}}}*/
 
-/* ================================================== */
-/* Type definitions */
+/* Type definitions (tags, places etc) *//*{{{*/
 
 typedef enum {
   PT_ORD, /* one of the x1 .. x5 of the BRIVLA etc */
@@ -549,6 +452,7 @@
 
 typedef struct {
   TreeNode *tag;
+  TreeNode *inner_tu2;
 } TagPlace;
 
 typedef struct {
@@ -563,19 +467,12 @@
   TagPlace tag;
   JaiPlace jai;
 } Place;
+/*}}}*/
 
-/*++++++++++++++++++++++++++++++++++++++
-  Given a term treenode and a single place descriptor, chain the place
-  information to the node's property list for later display.
-
-  TreeNode *x
-
-  Place pl
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-fixup_term_place(TreeNode *x, Place *pl, XTermTag *tt)
+static void fixup_term_place(TreeNode *x, Place *pl, XTermTag *tt)/*{{{*/
 {
+  /* Given a term treenode and a single place descriptor, chain the place
+     information to the node's property list for later display. */
   XTermTags *ts, *nts;
 
   type_check(x, TERM);
@@ -602,6 +499,7 @@
 
     case PT_TAG:
       ts->tag.jaitag.tag = pl->tag.tag;
+      ts->tag.jaitag.inner_tu2 = pl->tag.inner_tu2;
       ts->tag.type = TTT_JAITAG;
       break;
 
@@ -610,26 +508,8 @@
       break;
   }
   
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TermVector *t
-
-  Place *place
-
-  Place *fai
-
-  int abase The base value for terms which are in unmarked places. (2
-  for links and tail terms, 1 for head terms)
-
-  XTermTag *tt Information about the primitive tanru_unit_2, one of
-  whose places the term occupies.
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-assign_terms_to_places(TermVector *t, Place *place, Place *fai, int abase, XTermTag *tt)
+}/*}}}*/
+static void assign_terms_to_places(TermVector *t, Place *place, Place *fai, int abase, XTermTag *tt)/*{{{*/
 {
   int i, n;
   int base;
@@ -692,18 +572,8 @@
   }
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  LinkConv *lc
-
-  TreeNode *convertible
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-assign_conversion(LinkConv *lc, TreeNode *convertible)
+/*}}}*/
+static void assign_conversion(LinkConv *lc, TreeNode *convertible)/*{{{*/
 {
   Place place[MAX_POS];
   XConversion *ext;
@@ -760,24 +630,12 @@
   ext->conv = place[1].pos;
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Work out which terms have which places in the bridi, and tag them
-  accordingly
-
-  TermVector *pre
-
-  TermVector *post
-
-  LinkConv *lc
-
-  XTermTag *tt
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-assign_places(TermVector *pre, TermVector *post, LinkConv *lc, XTermTag *tt)
+/*}}}*/
+static void assign_places(TermVector *pre, TermVector *post, LinkConv *lc, XTermTag *tt)/*{{{*/
 {
+  /* Work out which terms have which places in the bridi, and tag them
+     accordingly */
+
   /* Variable declarations */
 
   /*+ Array for the ordinary places in the bridi.  e.g. if you get a
@@ -831,7 +689,8 @@
           fai[1] = place[1];
           place[1].type = PT_TAG;
           place[1].valid = 1;
-          place[1].tag.tag = lc->e[i].data.jai_tag;
+          place[1].tag.tag = lc->e[i].data.jai_tag.tag;
+          place[1].tag.inner_tu2 = lc->e[i].data.jai_tag.inner_tu2;
         }
       break;
 
@@ -847,7 +706,7 @@
             fai[j] = fai[j-1];
           }
           fai[1] = place[1];
-          place[1].type = PT_JAI;
+          place[1].type = PT_ORD;
           place[1].valid = 1;
         }
         break;
@@ -863,40 +722,20 @@
   assign_terms_to_places(post, place, fai, 2, tt);
 
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Handle argument processing at the level of a tanru_unit_2.  This is
-  where the clever tag assignment stuff is done!
-
-  TreeNode *tu2 The tanru_unit_1 node
-
-  TermVector *pre The vector of terms occurring before the selbri
-
-  TermVector *post The vector of terms occurring after the selbri
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_tanru_unit_2_args(TreeNode *tu2, TermVector *pre, TermVector *post, LinkConv *lc)
+/*}}}*/
+static void process_tanru_unit_2_args(TreeNode *tu2, TermVector *pre, TermVector *post, LinkConv *lc)/*{{{*/
 {
-
-  /* Need to descend until we get to something 'primitive' enough.
-     This is one of :
-     
-     BRIVLA
-
-     (other cases need adding, when I think how to do it.)
-     
-     */
+  /* Handle argument processing at the level of a tanru_unit_2.  This is
+     where the clever tag assignment stuff is done! */
 
   TreeNode *c1;
 
   type_check(tu2, TANRU_UNIT_2);
   c1 = maybe_strip_attitudinal(child_ref(tu2, 0));
 
-  if (c1->type == N_CMAVO) {
+  if (c1->type == N_CMAVO) {/*{{{*/
     switch (c1->data.cmavo.selmao) {
-      case GOhA:
+      case GOhA:/*{{{*/
         {
           XTermTag tt;
 
@@ -905,8 +744,8 @@
           assign_places(pre, post, lc, &tt);
           assign_conversion(lc, c1);
         }
-        break;
-      case ME:
+        break;/*}}}*/
+      case ME:/*{{{*/
         {
           XTermTag tt;
           XRequireBrac *xrb;
@@ -920,8 +759,8 @@
           assign_places(pre, post, lc, &tt);
           /* Conversion can't occur on ME, there is only an x1 place */
         }
-      break;
-      case NUhA:
+      break;/*}}}*/
+      case NUhA:/*{{{*/
         {
           XTermTag tt;
           XRequireBrac *xrb;
@@ -932,21 +771,21 @@
           assign_conversion(lc, c1);
           xrb = prop_require_brac (tt.nuha.mex_operator, YES);
         }
-        break;
+        break;/*}}}*/
+        
     }
-
-  } else if (c1->type == N_BRIVLA) {
+/*}}}*/
+  } else if (c1->type == N_BRIVLA) {/*{{{*/
 
     XTermTag tt;
 
     tt.type = TTT_BRIVLA;
     tt.brivla.x = c1;
     assign_places(pre, post, lc, &tt);
-    assign_conversion(lc, c1);
-
+    assign_conversion(lc, c1);/*}}}*/
   } else if (c1->type == N_NONTERM) {
     switch (c1->data.nonterm.type) {
-      case NUMBER_MOI_TU2:
+      case NUMBER_MOI_TU2:/*{{{*/
         {
           TreeNode *norl, *moi;
           XRequireBrac *xrb;
@@ -965,8 +804,8 @@
           }
         }
         break;
-
-      case KE_SELBRI3_TU2:
+/*}}}*/
+      case KE_SELBRI3_TU2:/*{{{*/
         {
           TreeNode *cs3;
 
@@ -975,9 +814,8 @@
           process_selbri_3_args(cs3, pre, post, lc);
         }
 
-        break;
-
-      case SE_TU2:
+        break;/*}}}*/
+      case SE_TU2:/*{{{*/
         {
           LinkConv newlc;
           int conv;
@@ -995,25 +833,26 @@
 
         }
 
-        break;
-
-      case JAI_TAG_TU2:
+        break;/*}}}*/
+      case JAI_TAG_TU2:/*{{{*/
         {
           LinkConv newlc;
           TreeNode *ctag, *tu2_child;
+          XRequireBrac *xrb;
 
           lc_copy(lc, &newlc);
           ctag = find_nth_child(c1, 1, TAG);
           tu2_child = find_nth_child(c1, 1, TANRU_UNIT_2);
           assert(ctag);
           assert(tu2_child);
-          lc_append_jai_tag(&newlc, ctag);
+          lc_append_jai_tag(&newlc, ctag, tu2_child);
+          xrb = prop_require_brac(tu2_child, YES);
           process_tanru_unit_2_args(tu2_child, pre, post, &newlc);
         }
 
         break;
-
-      case JAI_TU2:
+/*}}}*/
+      case JAI_TU2:/*{{{*/
         {
           LinkConv newlc;
           TreeNode *tu2_child;
@@ -1025,8 +864,8 @@
           process_tanru_unit_2_args(tu2_child, pre, post, &newlc);
         }
         break;
-
-      case NAHE_TU2:
+/*}}}*/
+      case NAHE_TU2:/*{{{*/
         {
           TreeNode *tu2_child;
           tu2_child = find_nth_child(c1, 1, TANRU_UNIT_2);
@@ -1034,8 +873,8 @@
           process_tanru_unit_2_args(tu2_child, pre, post, lc);
         }
         break;
-
-      case ABSTRACTION:
+/*}}}*/
+      case ABSTRACTION:/*{{{*/
         {
           TreeNode *nns, *c2, *nu, *nai;
           XTermTag tt;
@@ -1066,12 +905,12 @@
           }
         }
         break;
-
+/*}}}*/
       default:
         break;
 
     }
-  } else if (c1->type == N_ZEI) {
+  } else if (c1->type == N_ZEI) {/*{{{*/
 
     XTermTag tt;
 
@@ -1079,29 +918,18 @@
     tt.zei.zei = c1;
     assign_places(pre, post, lc, &tt);
     assign_conversion(lc, c1);
-    
+    /*}}}*/
   } else {
     abort();
   }
   
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Handle argument processing at the level of a tanru_unit_1.
-
-  TreeNode *tu1 The tanru_unit_1 node
-
-  TermVector *pre The vector of terms occurring before the selbri
-
-  TermVector *post The vector of terms occurring after the selbri
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_tanru_unit_1_args(TreeNode *tu1, TermVector *pre, TermVector *post, LinkConv *lc)
+/*}}}*/
+static void process_tanru_unit_1_args(TreeNode *tu1, TermVector *pre, TermVector *post, LinkConv *lc)/*{{{*/
 {
+/* Handle argument processing at the level of a tanru_unit_1.  */
+
   XTermVector *xtv; /* The linked sumti vector of the tu1 if any */
   XDoneTU1 *xdtu1; /* Property whose existence on the TU1 tree node
                       shows we have processed its linked sumti */
@@ -1128,21 +956,11 @@
   xdtu1 = prop_done_tu1(tu1, YES);
 
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Drill down into a selbri_6 etc
-
-  TreeNode *s6 The selbri_6 node
-
-  TermVector *pre The vector of terms occurring before the selbri
-
-  TermVector *post The vector of terms occurring after the selbri
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_selbri_6_args(TreeNode *s6, TermVector *pre, TermVector *post, LinkConv *lc)
+/*}}}*/
+static void process_selbri_6_args(TreeNode *s6, TermVector *pre, TermVector *post, LinkConv *lc)/*{{{*/
 {
+  /* Drill down into a selbri_6 etc */
+
   TreeNode *tu, *tu1, *cs6, *cs;
   
   /* For the cases with BO, I think it's only the very final term
@@ -1176,21 +994,11 @@
     }
   }
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Drill down into a selbri_5 etc
-
-  TreeNode *s5 The selbri_5 node
-
-  TermVector *pre The vector of terms occurring before the selbri
-
-  TermVector *post The vector of terms occurring after the selbri
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_selbri_5_args(TreeNode *s5, TermVector *pre, TermVector *post, LinkConv *lc)
+/*}}}*/
+static void process_selbri_5_args(TreeNode *s5, TermVector *pre, TermVector *post, LinkConv *lc)/*{{{*/
 {
+  /* Drill down into a selbri_5 etc */
+
   TreeNode *s6, *cs5;
 
   cs5 = s5;
@@ -1201,24 +1009,13 @@
   } while (cs5);
 
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Drill down into a selbri_3 to try to recover a single tanru_unit_2
-  which is the tertau.  Apply the supplied args to that - i.e. work
-  out the place structure, then go and mark all the referenced terms
-  accordingly.
-
-  TreeNode *s3 The selbri_3 node
-
-  TermVector *pre The vector of terms occurring before the selbri
-
-  TermVector *post The vector of terms occurring after the selbri
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_selbri_3_args(TreeNode *s3, TermVector *pre, TermVector *post, LinkConv *lc)
+/*}}}*/
+static void process_selbri_3_args(TreeNode *s3, TermVector *pre, TermVector *post, LinkConv *lc)/*{{{*/
 {
+  /* Drill down into a selbri_3 to try to recover a single tanru_unit_2 which is
+     the tertau.  Apply the supplied args to that - i.e. work out the place
+     structure, then go and mark all the referenced terms accordingly.  */
+
   TreeNode *s4, *s5, *ks3, *cs3; /* The selbri chain */
 
   type_check(s3, SELBRI_3);
@@ -1257,21 +1054,11 @@
   }
 
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Drill down into a selbri etc
-
-  TreeNode *s The selbri node
-
-  TermVector *pre The vector of terms occurring before the selbri
-
-  TermVector *post The vector of terms occurring after the selbri
-
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_selbri_args(TreeNode *s, TermVector *pre, TermVector *post, LinkConv *lc)
+/*}}}*/
+static void process_selbri_args(TreeNode *s, TermVector *pre, TermVector *post, LinkConv *lc)/*{{{*/
 {
+  /* Drill down into a selbri etc */
+
   TreeNode *cs, *s1, *s2, *s3; /* The selbri chain */
   TermVector empty_tv;
 
@@ -1311,21 +1098,11 @@
     /* All arguments apply to the selbri_3 */
     process_selbri_3_args(s3, pre, post, lc);
   }
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Handle each main selbri after it has been located.
-
-  TreeNode *ms A main_selbri parse tree node
-
-  TermVector *pre Vector of terms occurring before the selbri
-
-  TermVector *post Vector of terms occurring after the selbri
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_main_selbri(TreeNode *ms, TermVector *pre, TermVector *post)
+}/*}}}*/
+static void process_main_selbri(TreeNode *ms, TermVector *pre, TermVector *post)/*{{{*/
 {
+  /* Handle each main selbri after it has been located.  */
+
   TermVector *cpre, *cpost;
   XTermVectors *xtv;
   TreeNode *s;
@@ -1355,20 +1132,8 @@
 
   process_selbri_args(s, pre, post, &lc);
 
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Analyse a bridi_tail_3.
-  
-  TreeNode *bt3 The bridi_tail_3 node.
-
-  TermVector *pre Terms before selbri
-
-  TermVector *post Terms after selbri
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_bridi_tail_3(TreeNode *bt3, TermVector *pre, TermVector *post)
+}/*}}}*/
+static void process_bridi_tail_3(TreeNode *bt3, TermVector *pre, TermVector *post)/*{{{*/
 {
   TreeNode *c1, *gsc, *tt, *ttc;
   TermVector tail_terms, new_post;
@@ -1439,20 +1204,8 @@
   }
     
   return;
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Analyse a bridi_tail_2.
-  
-  TreeNode *bt2 The bridi_tail_2 node.
-
-  TermVector *pre Terms before selbri
-
-  TermVector *post Terms after selbri
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_bridi_tail_2(TreeNode *bt2, TermVector *pre, TermVector *post)
+}/*}}}*/
+static void process_bridi_tail_2(TreeNode *bt2, TermVector *pre, TermVector *post)/*{{{*/
 {
 
   TreeNode *bt3, *bt2a, *bt2b, *tt, *ttt; /* The children */
@@ -1501,22 +1254,8 @@
 
   return;
 
-}
-
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Analyse a bridi_tail_1.
-  
-  TreeNode *bt1 The bridi_tail_1 node.
-
-  TermVector *pre Terms before selbri
-
-  TermVector *post Terms after selbri
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_bridi_tail_1(TreeNode *bt1, TermVector *pre, TermVector *post)
+}/*}}}*/
+static void process_bridi_tail_1(TreeNode *bt1, TermVector *pre, TermVector *post)/*{{{*/
 {
 
   TreeNode *bt2, *bt1c, *tt, *ttt; /* The children */
@@ -1565,11 +1304,10 @@
 
   return;
 
-}
-
-
-/*++++++++++++++++++++++++++++++
-
+}/*}}}*/
+static void process_bridi_tail(TreeNode *bt, TermVector *pre, TermVector *post)/*{{{*/
+{
+/*
   Analyse a bridi_tail down to each vector of tail_terms and find each
   main_selbri within it.  Ultimately tag each main_selbri with the
   vectors of pre- and post- terms that apply to it.
@@ -1583,11 +1321,8 @@
   TermVector *post The vector of tail_terms accumulated from outer
   bridi_tail constructions.  (Initially this will be empty).
 
-  ++++++++++++++++++++++++++++++*/
+  */
 
-static void
-process_bridi_tail(TreeNode *bt, TermVector *pre, TermVector *post)
-{
   TreeNode *bt1, *btc, *tt, *ttt; /* The children */
   struct nonterm *nt;
   int nc;
@@ -1632,16 +1367,8 @@
       process_bridi_tail(btc, pre, post);
     }
   }
-}
-
-/*++++++++++++++++++++++++++++++
-  
-  Deal with a STATEMENT_3 once we have found it.
-
-  ++++++++++++++++++++++++++++++*/
-
-static void
-process_statement_3(TreeNode *x)
+}/*}}}*/
+static void process_statement_3(TreeNode *x)/*{{{*/
 {
   struct nonterm *nt, *nts, *ntc;
   TreeNode *sent, *terms, *btail, *c;
@@ -1714,18 +1441,8 @@
     }
 #endif
   }
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Handle abstraction nodes
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-
-static void
-process_abstraction(TreeNode *x)
+}/*}}}*/
+static void process_abstraction(TreeNode *x)/*{{{*/
 {
   TreeNode *ss;
   TermVector pre, post;
@@ -1735,17 +1452,8 @@
   ss = child_ref(x, 1);
   type_check(ss, SUBSENTENCE);
   process_subsentence(ss, &pre, &post);
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_relative_clause(TreeNode *x)
+}/*}}}*/
+static void process_relative_clause(TreeNode *x)/*{{{*/
 {
   TreeNode *ss, *fc;
   TermVector pre, post;
@@ -1759,20 +1467,13 @@
       process_subsentence(ss, &pre, &post);
     }
   }
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Handle metalinguistic constructions (SEI...).  This is slightly
-  different to the other cases, in that we have to resolve the terms
-  and selbri ourselves.
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_metalinguistic(TreeNode *x)
+}/*}}}*/
+static void process_metalinguistic(TreeNode *x)/*{{{*/
 {
+  /* Handle metalinguistic constructions (SEI...).  This is slightly
+    different to the other cases, in that we have to resolve the terms and selbri
+    ourselves.  */
+
   TermVector pre, post;
   TreeNode *terms, *mmselbri, *selbri;
   LinkConv lc;
@@ -1791,24 +1492,17 @@
   lc_init(&lc);
   process_selbri_args(selbri, &pre, &post, &lc);
 
-}
-
-/*++++++++++++++++++++++++++++++
-  Seek recursively downwards looking for treenodes of type
-  STATEMENT_3, ABSTRACTION.  We're really interested in SENTENCE, but
-  that occurs within ge subsentence->sentence gi
-  subsentence->sentence, and we have to track term strings into
-  gek_sentences because you can have terms in front plus tail terms
-  behind, that are part of both clauses.
-
-  Relative clauses and NU abstractions (the 2 other places where
-  sentences can occur) will have to be looked at afterwards.
-
-  ++++++++++++++++++++++++++++++*/
-
-static void
-scan_for_sentence_parents(TreeNode *x)
+}/*}}}*/
+static void scan_for_sentence_parents(TreeNode *x)/*{{{*/
 {
+  /* Seek recursively downwards looking for treenodes of type
+    STATEMENT_3, ABSTRACTION.  We're really interested in SENTENCE, but that
+    occurs within ge subsentence->sentence gi subsentence->sentence, and we
+    have to track term strings into gek_sentences because you can have terms in
+    front plus tail terms behind, that are part of both clauses.
+
+    Relative clauses and NU abstractions (the 2 other places where sentences
+    can occur) will have to be looked at afterwards.  */
 
   int nc, i;
   struct nonterm *nt;
@@ -1836,19 +1530,12 @@
     }
 
   }
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Scan down into the selbri inside a 'quantifier selbri' sumti, to
-  mark up the tertau for conversions.
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_sumti_5b(TreeNode *x)
+}/*}}}*/
+static void process_sumti_5b(TreeNode *x)/*{{{*/
 {
+  /* Scan down into the selbri inside a 'quantifier selbri' sumti, to mark up the
+     tertau for conversions.  */
+
   TreeNode *c;
   LinkConv lc;
 
@@ -1859,17 +1546,11 @@
   assert(c);
   lc_init(&lc);
   process_selbri_args(c, &empty_tv, &empty_tv, &lc);
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Scan down into the selbri inside a sumti_tail construction.
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-process_sumti_tail_1a(TreeNode *x)
+}/*}}}*/
+static void process_sumti_tail_1a(TreeNode *x)/*{{{*/
 {
+  /* Scan down into the selbri inside a sumti_tail construction.  */ 
+
   TreeNode *c;
   LinkConv lc;
 
@@ -1880,17 +1561,12 @@
   assert(c);
   lc_init(&lc);
   process_selbri_args(c, &empty_tv, &empty_tv, &lc);
-}
-
-
-/*++++++++++++++++++++++++++++++
-  Process selbri in the context of sumti.  Allows conversions to be
-  handled with full generality.
-  ++++++++++++++++++++++++++++++*/
-
-static void
-scan_for_selbri_in_sumti(TreeNode *x)
+}/*}}}*/
+static void scan_for_selbri_in_sumti(TreeNode *x)/*{{{*/
 {
+  /* Process selbri in the context of sumti.  Allows conversions to be
+    handled with full generality.  */
+
   int nc, i;
   struct nonterm *nt;
   TreeNode *c;
@@ -1910,17 +1586,11 @@
       scan_for_selbri_in_sumti(c);
     }
   }
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Check whether there are linked sumti and add property
-
-  TreeNode *tu1 The tanru_unit_1 parse node
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-check_tu1_for_links(TreeNode *tu1)
+}/*}}}*/
+static void check_tu1_for_links(TreeNode *tu1)/*{{{*/
 {
+/* Check whether there are linked sumti and add property */
+
   TreeNode *la, *tm, *lk, *tc, *tcc;
   TermVector tv;
   XTermVector *xtv;
@@ -1989,18 +1659,12 @@
     xtv->vec = new(TermVector);
     *(xtv->vec) = tv;
   }
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Find any tanru_unit_1 with linked sumti on it, and build a
-  termvector property to attach to it.
-
-  TreeNode *top Top node of parse tree
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-scan_tu1_phase1(TreeNode *x)
+}/*}}}*/
+static void scan_tu1_phase1(TreeNode *x)/*{{{*/
 {
+  /* Find any tanru_unit_1 with linked sumti on it, and build a termvector
+     property to attach to it.  */
+
   int nc, i;
   struct nonterm *nt;
   TreeNode *c;
@@ -2019,16 +1683,8 @@
     }
   }
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  TreeNode *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-scan_tu1_phase2(TreeNode *x)
+/*}}}*/
+static void scan_tu1_phase2(TreeNode *x)/*{{{*/
 {
   int nc, i;
   struct nonterm *nt;
@@ -2068,16 +1724,12 @@
   }
 
 }
-
-/*++++++++++++++++++++++++++++++
-  The top-level operation called from the main program.
-  ++++++++++++++++++++++++++++++*/
-
-void
-terms_processing(TreeNode *top)
+/*}}}*/
+void terms_processing(TreeNode *top)/*{{{*/
 {
   scan_tu1_phase1(top);
   scan_for_sentence_parents(top);
   scan_for_selbri_in_sumti(top);
   scan_tu1_phase2(top);
 }
+/*}}}*/
diff -urN jbofihe-0.36/testing/jvocipra.pl jbofihe-0.37/testing/jvocipra.pl
--- jbofihe-0.36/testing/jvocipra.pl	Thu Jan  1 01:00:00 1970
+++ jbofihe-0.37/testing/jvocipra.pl	Wed Aug  8 22:41:52 2001
@@ -0,0 +1,137 @@
+#!/usr/bin/env perl
+#
+# $Header: /cvs/src/jbofihe/testing/jvocipra.pl,v 1.2 2001/07/10 22:18:06 richard Exp $
+
+# Perl script for chaining jvocu'adju and vlatai together to look for failures on random input.
+
+# Copyright 2001  Richard P. Curnow
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+# 
+
+@gc = ();
+$| = 1; # autoflush
+open (IN, "<../reduced_gismu");
+while (<IN>) {
+    chomp;
+    m{^([^ \t]+)[ \t]}o;
+    $word = $1;
+    $word =~ s/\'/\\'/o;
+    push (@gc, $word);
+}
+close (IN);
+
+srand;
+
+$ngc = 1 + $#gc;
+$kk = 0;
+
+main_loop:
+while (1) {
+    ++$kk;
+    if (($kk%500) == 0) {
+        print "$kk\n";
+    }
+    
+    @args = ();
+    $shown_args = 0;
+    $n = 2 + int (6 * &Rng::rng());
+    for $i (1 .. $n) {
+        while (1) {
+            # For final component, can't have a cmavo, unless that cmavo has a
+            # rafsi ending in a vowel.  Don't know that in here, so be
+            # pessimistic.
+            $e = int(&Rng::rng() * $ngc);
+            $w = $gc[$e];
+            
+            # Lujvo canonicaliser in vlatai reverses brody to brodu always, so
+            # lots of bogus failures will occur unless the other four are avoided.
+            next if ($w =~ /brod[aeio]/o);
+            
+            if (($i < $n) || (length ($w) == 5)) {
+                push (@args, $gc[$e]);
+                last;
+            }
+        }
+    }
+
+    @lujvo = (); # alleged lujvo for this tanru
+    $args = join(" ", @args);
+    #    print $args."\n";
+
+    open (IN, "../jvocuhadju -l -a $args 2>&1 |");
+    while (<IN>) {
+        chomp;
+        if (/unmatched/io) {
+            # Due to picking a cmavo as the last tanru component where that cmavo
+            # has no rafsi ending in a vowel.
+            next main_loop;
+        }
+        
+        if (/^[ \t]+[0-9]+[ \t]+([^ \t]+)/) {
+            push (@lujvo, $1);
+        }
+    }
+    close (IN);
+
+    open (OUT, ">temp_vt.in");
+    for $i (@lujvo) {
+        print OUT "$i\n";
+    }
+    close (OUT);
+        
+    open (IN, "../vlatai -el < temp_vt.in |");
+    while (<IN>) {
+        chomp;
+        m{^([^:]+):([^:]+):(.+[^\s])\s+\[([^]]+)\]\s*$}o or die "Unmatched $_";
+        $a = $1;
+        $b = $2;
+        $c = $3;
+        $d = $4;
+        $b =~ s/^ +//o;
+        $b =~ s/ +$//o;
+        $c =~ s/^ +//o;
+        $c =~ s/ +$//o;
+        $d =~ s/\+/ /go;
+        $d =~ s/\'/\\'/go;
+        if (($b ne "lujvo") ||
+            ($c =~ / /) ||
+            ($d ne $args)) {
+            print "$args\n" unless ($shown_args);
+            $shown_args = 1;
+            print "###$b:$c\n";
+        }
+    }
+    close(IN);
+
+}
+
+package Rng;
+BEGIN {
+    $rng_open = 0;
+}
+
+sub rng {
+    if (!$rng_open) {
+        open (RNG, "</dev/urandom") or die "No /dev/urandom";
+        $rng_open = 1;
+    }
+
+    sysread(RNG, $data, 4);
+    $x = unpack("L", $data);
+    $x *= 2.3283064365387e-10;
+    return $x;
+}
+    
+
diff -urN jbofihe-0.36/textblk.c jbofihe-0.37/textblk.c
--- jbofihe-0.36/textblk.c	Mon Mar 26 22:03:09 2001
+++ jbofihe-0.37/textblk.c	Wed Aug  8 22:41:51 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/textblk.c,v 1.3 1999/10/19 19:59:31 richard Exp $
+  $Header: /cvs/src/jbofihe/textblk.c,v 1.5 2001/05/28 21:30:15 richard Exp $
 
   Driver for producing plain text output, using blocks rather than
   free-flow.
@@ -62,13 +62,7 @@
 static char loj_line[BUFFER_SIZE];
 static char eng_line[BUFFER_SIZE];
 
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-clear_line_buffer(void)
+static void clear_line_buffer(void)/*{{{*/
 {
   int i;
 
@@ -82,14 +76,8 @@
 
   current_width = 0;
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-initialise(void)
+/*}}}*/
+static void initialise(void)/*{{{*/
 {
   int i;
 
@@ -105,23 +93,13 @@
 
   max_width = opt_output_width;
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_prologue(void)
+/*}}}*/
+static void write_prologue(void)/*{{{*/
 {
   return;
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-flush_line(void)
+/*}}}*/
+static void flush_line(void)/*{{{*/
 {
   int i;
   
@@ -137,24 +115,14 @@
   }
   fputs("\n", stdout);
   clear_line_buffer();
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
+}/*}}}*/
 
 /* Number of end of lines that are pending.  (These are only inserted
    when we have closed a sequence of close brackets, i.e. before the
    next open bracket or ordinary text.) */
 static int pending_eols = 0;
 
-/*++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++*/
-
-static void
-clear_eols(void)
+static void clear_eols(void)/*{{{*/
 {
   if (pending_eols > 0) {
 
@@ -168,22 +136,11 @@
     state = ST_OPEN;
     pending_eols = 0;
   }
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Append a string onto a buffer, and right pad with spaces to bring up
-  to a specific width
-
-  char *src
-
-  char *dbuf
-
-  int wid
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-append_to_width(char *src, char *dbuf, int wid) {
+}/*}}}*/
+static void append_to_width(char *src, char *dbuf, int wid)/*{{{*/
+/* Append a string onto a buffer, and right pad with spaces to bring up
+  to a specific width */
+{
   int len, toadd, i;
   char buffer[BUFFER_SIZE];
   len = strlen(src);
@@ -194,14 +151,9 @@
   buffer[toadd] = '\0';
   strcat(dbuf, src);
   strcat(dbuf, buffer);
-}
-
-/*++++++++++++++++++++++++++++++
-  Push current block onto line buffer
-  ++++++++++++++++++++++++++++++*/
-
-static void
-flush_block(void)
+}/*}}}*/
+static void flush_block(void)/*{{{*/
+/* Push current block onto line buffer */
 {
   int i;
   int max_len, len;
@@ -253,39 +205,20 @@
   tags_used = 0;
   
 }
-
-/*++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++*/
-
-static void
-set_eols(int eols)
+/*}}}*/
+static void set_eols(int eols)/*{{{*/
 {
   pending_eols += eols;
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Do final acts of writing to output file.
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_epilog(void)
+/*}}}*/
+static void write_epilog(void)/*{{{*/
+/* Do final acts of writing to output file. */
 {
   flush_block();
   flush_line();
   return;
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  BracketType type
-
-  int subscript
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_open_bracket(BracketType type, int subscript)
+}/*}}}*/
+static void write_open_bracket(BracketType type, int subscript)/*{{{*/
 {
   char *brac;
   char *brac1, *brac2, *brac3;
@@ -348,18 +281,8 @@
   state = ST_OPEN;
 
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  BracketType type
-
-  int subscript
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_close_bracket(BracketType type, int subscript)
+/*}}}*/
+static void write_close_bracket(BracketType type, int subscript)/*{{{*/
 {
   char *brac;
   char *brac1, *brac2, *brac3;
@@ -419,18 +342,8 @@
 
   state = ST_CLOSE;
 
-}
-
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  char *text
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_lojban_text(char *text)
+}/*}}}*/
+static void write_lojban_text(char *text)/*{{{*/
 {
 
   if (eng_text[0]) {
@@ -442,17 +355,8 @@
   strcat(loj_text, text);
   strcat(loj_text, " ");
 
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  char *text
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_special(char *text)
+}/*}}}*/
+static void write_special(char *text)/*{{{*/
 {
   if (!strcmp(text, "$LEFTARROW")) {
     strcat(eng_text, "<-");
@@ -461,16 +365,8 @@
   } else if (!strcmp(text, "$CLOSEQUOTE")) {
     strcat(eng_text,"\"");
   }
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  char *text
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_translation(char *text)
+}/*}}}*/
+static void write_translation(char *text)/*{{{*/
 {
   if (text[0] == '$') {
     write_special(text);
@@ -479,62 +375,32 @@
     strcat(eng_text, " ");
   }
 }
+/*}}}*/
 
 /*+  +*/
 static int first_tag;
 
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-start_tags(void)
+static void start_tags(void)/*{{{*/
 {
   if (loj_text[0] || eng_text[0]) {
     flush_block();
   }
   clear_eols();
   first_tag = 1;
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-end_tags(void)
+}/*}}}*/
+static void end_tags(void)/*{{{*/
 {
   tags_used++;
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-start_tag(void)
+}/*}}}*/
+static void start_tag(void)/*{{{*/
 {
   if (!first_tag) {
     tags_used++;
   }
   first_tag = 0;
 }
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  char *brivla
-
-  char *place
-
-  char *trans
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-write_tag_text(char *brivla, char *place, char *trans, int brac)
+/*}}}*/
+static void write_tag_text(char *brivla, char *place, char *trans, int brac)/*{{{*/
 {
   char buffer1[256], buffer2[256];
 
@@ -547,10 +413,15 @@
   strcat(tag_text[tags_used], buffer1);
   strcat(tag_text[tags_used], buffer2);
 }
+/*}}}*/
+static void write_partial_tag_text(char *t)/*{{{*/
+{
+  strcat(tag_text[tags_used], t);
+}
+/*}}}*/
 
-
-/*+  +*/
-DriverVector text_block_driver = {
+DriverVector text_block_driver = /*{{{*/
+{
   initialise,
   write_prologue,
   write_epilog,
@@ -563,4 +434,5 @@
   end_tags,
   start_tag,
   write_tag_text,
-};
+  write_partial_tag_text
+};/*}}}*/
diff -urN jbofihe-0.36/textout.c jbofihe-0.37/textout.c
--- jbofihe-0.36/textout.c	Mon Mar 26 22:03:09 2001
+++ jbofihe-0.37/textout.c	Wed Aug  8 22:41:51 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/textout.c,v 1.2 1999/10/06 21:11:15 richard Exp $
+  $Header: /cvs/src/jbofihe/textout.c,v 1.3 2001/05/28 21:30:15 richard Exp $
 
   Driver for producing plain text output from the glosser.
   ***************************************/
@@ -236,6 +236,7 @@
   }
 
   printf("%s", text);
+  fflush(stdout);
 
   state = ST_TEXT;
 }
@@ -287,6 +288,7 @@
   }
 
   state = ST_TEXT;
+  /* fflush(stdout); */
 }
 
 /*+  +*/
@@ -353,8 +355,14 @@
   }
 }
 
-/*+  +*/
-DriverVector textout_driver = {
+static void write_partial_tag_text(char *t)/*{{{*/
+{
+  printf("%s", t);
+}
+/*}}}*/
+
+DriverVector textout_driver = /*{{{*/
+{
   initialise,
   write_prologue,
   write_epilog,
@@ -366,5 +374,7 @@
   start_tags,
   end_tags,
   start_tag,
-  write_tag_text
-};
+  write_tag_text,
+  write_partial_tag_text
+};/*}}}*/
+
diff -urN jbofihe-0.36/translate.c jbofihe-0.37/translate.c
--- jbofihe-0.36/translate.c	Mon Mar 26 22:03:09 2001
+++ jbofihe-0.37/translate.c	Wed Aug  8 22:41:51 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/translate.c,v 1.32 2001/03/21 22:02:34 richard Exp $
+  $Header: /cvs/src/jbofihe/translate.c,v 1.36 2001/07/23 22:17:01 richard Exp $
 
   Translation functions.
   ***************************************/
@@ -22,6 +22,7 @@
  * 
  *********************************************************************/
 
+/*{{{  Includes */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -32,6 +33,7 @@
 #include "canonluj.h"
 #include "morf.h"
 #include "dictaccs.h"
+/*}}}*/
 
 extern int show_dictionary_defects;
 
@@ -41,21 +43,19 @@
 
 /* ================================================== */
 
-typedef struct Component {
+typedef struct Component {/*{{{*/
   int start; /* 0 for 1st, pos of + for others (before conversions) */
   int pure_start; /* likewise, but just the gismu/cmavo excl conversions) */
   char text[6];
   int places[6];
-} Component;
+} Component;/*}}}*/
 
-/*+ Forward prototype for place/context translater. +*/
+/*{{{ Forward prototype for place/context translater. */
 extern char *adv_translate(char *w, int place, TransContext ctx);
 static void split_into_comps(char *canon, Component *comp, int *ncomp);
+/*}}}*/
 
-/* ================================================== */
-
-char *
-translate(char *word)
+char * translate(char *word)/*{{{*/
 {
   char *buf;
   char *res;
@@ -70,14 +70,10 @@
     return NULL;
   }
 
-}
-
-/* ================================================== */
-
+}/*}}}*/
+static char * translate_lujvo(char *word, int place)/*{{{*/
 /* Lookup a lujvo that isn't matched in the ordinary dictionary, by
    smashing it into consituent rafsi and glueing these together. */
-static char *
-translate_lujvo(char *word, int place)
 {
   Component comp[32];
   int ncomp;
@@ -111,12 +107,8 @@
 
   return result;
 
-}
-
-/* ================================================== */
-
-char *
-translate_fuivla_prefix(char *w, int place)
+}/*}}}*/
+char * translate_fuivla_prefix(char *w, int place, TransContext ctx)/*{{{*/
 {
   char *canon;
   Component comp[32];
@@ -131,7 +123,7 @@
   for (i=0; i<n_comps; i++) {
     int first = (i == 0);
     int last  = (i == (n_comps-1));
-    trans = adv_translate(comp[i].text, comp[i].places[1], last ? TCX_NOUN : TCX_QUAL);
+    trans = adv_translate(comp[i].text, comp[i].places[1], last ? ctx : TCX_QUAL);
     if (!first) strcat(buffer, "-");
     if (trans) {
       strcat(buffer, trans);
@@ -140,21 +132,19 @@
     }
   }   
   return buffer;
-}
-
+}/*}}}*/
+char * translate_unknown(char *w, int place, TransContext ctx)/*{{{*/
 /* ================================================== */
 /* In principle, it would be nice to pass the word type through from the
    original parsing phase.  However, this isn't general enough, since the word
    to translate may have come from an earlier dictionary lookup.  Hence, call
    morf_scan again. */
 /* ================================================== */
-
-char *
-translate_unknown(char *w, int place)
 {
   static char buf[2048];
   char *ltrans;
   MorfType morf_type;
+  struct morf_xtra xtra;
   char *word_starts[64], **pws, **pwe;
 
   if (strchr(w, '+')) {
@@ -164,7 +154,8 @@
   }
 
   pws = pwe = word_starts;
-  morf_type = morf_scan(w, &pwe);
+
+  morf_type = morf_scan(w, &pwe, &xtra);
 
   switch (morf_type) {
     case MT_BOGUS:
@@ -183,14 +174,17 @@
       return translate_lujvo(w, place);
     case MT_FUIVLA3:
     case MT_FUIVLA3_CVC:
+    case MT_FUIVLA3X:
+    case MT_FUIVLA3X_CVC:
       {
         char *p, *q;
         int count;
-        int hyphen_pos;
-        hyphen_pos = (morf_type == MT_FUIVLA3_CVC) ? 3 : 4;
-        for (p=w, q=buf, count=0; count<hyphen_pos; p++, q++) {
-          *q = *p;
-          if (*p != ',') count++;
+        char *hyphen_pos;
+        hyphen_pos = xtra.u.stage_3.hyph;
+        for (p=w, q=buf; p<hyphen_pos; p++) {
+          if (*p != ',') {
+            *q++ = *p;
+          }
         }
         /* Advance p over the hyphen */
         for (count=0; count < 2; p++) {
@@ -199,7 +193,7 @@
         p--; /* Back up to first real letter of tail portion */
         
         *q = 0;
-        ltrans = translate_fuivla_prefix(buf, place);
+        ltrans = translate_fuivla_prefix(buf, place, ctx);
         if (ltrans) {
           strcpy(buf, ltrans);
         } else {
@@ -216,10 +210,8 @@
       return "[NAME]";
   }
   return "?";
-}
-
-/* ================================================== */
-
+}/*}}}*/
+/*{{{ Comments about 'advanced translate' machinery. */
 /*
 
   This section of the file deals with what I call 'advanced
@@ -314,12 +306,12 @@
 
 
  */
+ /*}}}*/
 
 static char consonants[] = "bcdfghjklmnpqrstvwxz";
 static char vowels[] = "aeiou";
 
-static int
-starts_with_preposition(char *x)
+static int starts_with_preposition(char *x)/*{{{*/
 {
   char *y;
   y = x;
@@ -334,45 +326,33 @@
     return 0;
   }
 
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
+}/*}}}*/
+static char * basic_trans(char *x)/*{{{*/
+/*
   Remove the '*' markers from a translation (only really applies to
   discrete class words being used as qualifiers, so far).
-
-  static char * basic_trans
-
-  char *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-basic_trans(char *x)
+  */
 {
   char *result, *p, *q;
   result = GETBUF();
   p = x, q = result;
   while (*p) {
-    if (*p != '*') {
+    if (p[0] != '*' || !isspace(p[1])) {
       *q++ = *p;
     }
     p++;
   }
   *q = 0;
   return result;
+}/*}}}*/
+static int is_ok_after_star(char x)/*{{{*/
+{
+  return (!x || isspace(x) || x == '/');
 }
-
-/*++++++++++++++++++++++++++++++++++++++
-  Make a word plural, applying standard English rules for when to use
-  -es or -ies instead of plain -s.
-
-  static char * make_plural
-
-  char *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-make_plural(char *x)
+/*}}}*/
+static char * make_plural(char *x)/*{{{*/
+/*  Make a word plural, applying standard English rules for when to use
+  -es or -ies instead of plain -s. */
 {
   char *result;
   int n;
@@ -381,7 +361,7 @@
   result = GETBUF();
 
   star_pos = strchr(x, '*');
-  if (star_pos) {
+  if (star_pos && is_ok_after_star(star_pos[1])) {
     char head[1024];
     char *result2;
     char *p, *q;
@@ -412,26 +392,15 @@
     return result;
   }
 
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  Append "-er" to a word
-
-  static char * append_er
-
-  char *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-append_er(char *x)
+}/*}}}*/
+static char * append_er(char *x)/*{{{*/
 {
   static char result[128];
   int n;
   char *star_pos;
 
   star_pos = strchr(x, '*');
-  if (star_pos) {
+  if (star_pos && is_ok_after_star(star_pos[1])) {
     char head[1024];
     char *result2;
     char *p, *q;
@@ -460,25 +429,15 @@
     return result;
   }
 
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Append "-ing" to a word
-
-  static char * append_ing
-
-  char *x
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-append_ing(char *x)
+}/*}}}*/
+static char * append_ing(char *x)/*{{{*/
 {
   static char result[128];
   int n;
   char *star_pos;
 
   star_pos = strchr(x, '*');
-  if (star_pos) {
+  if (star_pos && is_ok_after_star(star_pos[1])) {
     char head[1024];
     char *result2;
     char *p, *q;
@@ -508,23 +467,8 @@
     return result;
   }
 
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  static char * translate_pattern
-
-  char *w
-
-  int place
-
-  char *suffix
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-translate_pattern(char *w, int place, char *suffix)
+}/*}}}*/
+static char * translate_pattern(char *w, int place, char *suffix)/*{{{*/
 {
   char *new_start = NULL;
   int swap;
@@ -599,23 +543,8 @@
   }
 
 
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  char * fix_trans_in_context
-
-  char *trans
-
-  TransContext ctx
-
-  char *w1n
-  ++++++++++++++++++++++++++++++++++++++*/
-
-char *
-fix_trans_in_context(char *src, char *trans, TransContext ctx, char *w1n, int found_full_trans)
+}/*}}}*/
+char * fix_trans_in_context(char *src, char *trans, TransContext ctx, char *w1n, int found_full_trans)/*{{{*/
 {
   enum {CL_DISCRETE, CL_SUBSTANCE, CL_ACTOR, CL_PROPERTY, CL_REVERSE_PROPERTY, CL_IDIOMATIC} wordclass;
   char *result;
@@ -788,21 +717,8 @@
     return trans;
   }
 
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  static char * subst_base_in_pattern
-
-  char *trans
-
-  char *base
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-subst_base_in_pattern(char *trans, char *base)
+}/*}}}*/
+static char * subst_base_in_pattern(char *trans, char *base)/*{{{*/
 {
   char *result = GETBUF();
   char *p, *q, *r;
@@ -838,22 +754,13 @@
   }
   *q = 0;
   return result;
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Take a string a+b+c and split it into an array of components
-  separated by + signs.  If any components are se, te etc, bind them
-  as place exchanges to the following component.
-
-  char *canon
-
-  Component *comp
-
-  int *ncomp
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static void
-split_into_comps(char *canon, Component *comp, int *ncomp)
+}/*}}}*/
+static void split_into_comps(char *canon, Component *comp, int *ncomp)/*{{{*/
+/*
+  Take a string a+b+c and split it into an array of components separated by +
+  signs.  If any components are se, te etc, bind them as place exchanges to the
+  following component.
+  */
 {
   int i;
   int nc = 0;
@@ -893,29 +800,8 @@
 
   *ncomp = nc;
 
-}
-
-
-/*++++++++++++++++++++++++++++++++++++++
-  
-
-  static char * lookup_template_match
-
-  int prec
-
-  char *orig
-
-  Comp *comp
-
-  int ncomp
-
-  int place
-
-  TransContext ctx
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-lookup_template_match(int prec, int suffix, int gather, char *orig, Component *comp, int ncomp, int place, TransContext ctx)
+}/*}}}*/
+static char * lookup_template_match(int prec, int suffix, int gather, char *orig, Component *comp, int ncomp, int place, TransContext ctx)/*{{{*/
 {
   char generic[128]; /* the part that's found in the LHS of the dictionary pattern match */
   char specific[256]; /* the other part of the string */
@@ -977,24 +863,12 @@
   } else {
     return NULL;
   }
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  Try to match the Lojban word with various standard forms which the
-  dictionary provides.
-
-  char * attempt_pattern_match Return the translation obtained, or
-  NULL if pattern match failed to find anything.
-
-  char *w The Lojban word to pattern match on
-
-  int place The place whose translation is required
-
-  TransContext ctx The context in which the translation is required.
-  ++++++++++++++++++++++++++++++++++++++*/
-
-static char *
-attempt_pattern_match(char *w, int place, TransContext ctx)
+}/*}}}*/
+static char * attempt_pattern_match(char *w, int place, TransContext ctx)/*{{{*/
+/*
+  Try to match the Lojban word with various standard forms which the dictionary
+  provides.  Return NULL on no match.
+  */
 {
   char *canon;
   char *trans;
@@ -1057,20 +931,8 @@
 
   return NULL;
 
-}
-
-/*++++++++++++++++++++++++++++++++++++++
-  'Advanced' translate.
-
-  char * adv_translate Returns the english gloss of the word passed.
-
-  char *w
-
-  TransContext ctx
-  ++++++++++++++++++++++++++++++++++++++*/
-
-char *
-adv_translate(char *w, int place, TransContext ctx)
+}/*}}}*/
+char * adv_translate(char *w, int place, TransContext ctx)/*{{{*/
 {
   char *trans, *trans1;
   char w1n[128];
@@ -1171,7 +1033,7 @@
       if (show_dictionary_defects) {
         fprintf(stderr, "No dictionary entry for [%s], attempting to break up as lujvo\n", w);
       }
-      trans = translate_unknown(w, place);
+      trans = translate_unknown(w, place, ctx);
       if (trans) {
         strcpy(result, trans);
         strcat(result, "??");
@@ -1183,5 +1045,5 @@
     }
   }
 
-}
+}/*}}}*/
 
diff -urN jbofihe-0.36/tree.c jbofihe-0.37/tree.c
--- jbofihe-0.36/tree.c	Mon Mar 26 22:03:09 2001
+++ jbofihe-0.37/tree.c	Wed Aug  8 22:41:51 2001
@@ -1,5 +1,5 @@
 /***************************************
-  $Header: /cvs/src/jbofihe/tree.c,v 1.11 2001/03/09 22:18:57 richard Exp $
+  $Header: /cvs/src/jbofihe/tree.c,v 1.12 2001/06/22 22:16:04 richard Exp $
 
   Functions to manipulate and print the parse tree.
   ***************************************/
@@ -495,7 +495,7 @@
           if (conv == CV_NORMAL) {
             english = translate(x->data.brivla.word);
             if (!english) {
-              english = translate_unknown(x->data.brivla.word, 1);
+              english = translate_unknown(x->data.brivla.word, 1, TCX_NOUN);
             }
             if (english) {
               printf(" /%s/", english);
@@ -530,7 +530,7 @@
             } else {
               english = translate(x->data.brivla.word);
               if (!english) {
-                english = translate_unknown(x->data.brivla.word, 1);
+                english = translate_unknown(x->data.brivla.word, 1, TCX_NOUN);
               }
               if (english) {
                 printf(" /%s/", english);
diff -urN jbofihe-0.36/version.txt jbofihe-0.37/version.txt
--- jbofihe-0.36/version.txt	Mon Mar 26 22:03:09 2001
+++ jbofihe-0.37/version.txt	Wed Aug  8 22:41:51 2001
@@ -1 +1 @@
-char version_string[] = "$Name: V0_36 $";
+char version_string[] = "$Name: V0_37 $";
diff -urN jbofihe-0.36/vlatai.1 jbofihe-0.37/vlatai.1
--- jbofihe-0.36/vlatai.1	Mon Mar 26 22:03:09 2001
+++ jbofihe-0.37/vlatai.1	Wed Aug  8 22:41:51 2001
@@ -3,7 +3,7 @@
 vlatai \- Lojban word analyzer
 .SH SYNOPSIS
 .B vlatai
-[ -v ] [-cr] [ word ]
+[ -v ] [ word ]
 .SH DESCRIPTION
 .I vlatai
 is a program that reads a list of Lojban words from standard input, one word
@@ -22,11 +22,6 @@
 inside jbofihe.  vlatai is not fully supported or documented as a stand-alone
 program.
 .SH OPTIONS
-.TP
-.B -cr
-Allow cultural rafsi within lujvo (defined in section 4.16 of the Reference
-Grammar).  This affects the recognition of lujvo and of fu'ivla (the latter
-since it affects which word forms fail the slinku'i test.)
 .TP
 .B -v
 Verbose; show lots of information about the evolution of the internal state

