######### TEST 1 # make a list of words from monosyll not accepted by syllable.fsa and # put the results in not_accepted not_accepted : accept_results grep -e '^yes$$' -e '^no$$' accept_results | paste monosyll - | grep 'no$$' | cut -f1 > not_accepted @wc -l not_accepted |awk '{print $$1 " words not accepted, " ($$1/5890 * 100) "%"}' # run the list of words monosyll thru recognizer syllable.fsa and put results # in accept_results accept_results: syllable.fsa fsa -aa syllable.fsa < monosyll 2> accept_results # compile regular expression syllable from syllable.pl into syllable.fsa syllable.fsa : syllable.pl fsa -aux syllable.pl -r syllable > syllable.fsa ###### TEST 2: compile hyphenation program and check on list of monomorphemic words hyphen_errors : hyphen.syll paste hyphen.syll eow.syll | awk '($$1 != $$2) {print $$0}' > hyphen_errors @wc -l hyphen_errors | awk '{print $$1 " hyphenation errors, " ($$1/12628 * 100)"%"}' ## FAST but requires hyphenate (see below) hyphen.syll: hyphenate hyphenate < eow.stem > hyphen.syll ## SLOW but robust ## hyphen.syll: hyphenate.fsa ## fsa -aa hyphenate.fsa < eow.stem > hyphen.syll ## hyphenate is the c-compiled version of your program ## Compilation only works for deterministic transducers, and this requirement may ## fail for your program. (diagnostic: compilation takes very long, unix command top shows ## running processes. If there is a sicstus program consuming lots of memory, and growing ## fast, you are trying to compile a non-deterministic transducer.) ## If compilation fails use slow hyphen.syll method above. hyphenate : hyphenate.pl syllable.pl fsa debug=1 regex_cache=on -aux hyphenate.pl -r 'hyphenate' |fsa -cc > hyphenate.c cc hyphenate.c -o hyphenate hyphenate.fsa : hyphenate.pl syllable.pl fsa -aux hyphenate.pl -r hyphenate > hyphenate.fsa clean : rm -f syllable.fsa accept_results not_accepted hyphen.* hyphen_errors hyphenate hyphenate.fsa hyphenate.c