#!/usr/local/bin/perl

use HTML::Entities;

$f = $ARGV[0];

sub skip {
    print STDERR "$f:$.: $_";
}

sub process_item {

    if (/^\<b\>(.*?)\<\/b\>\s*/) {
	$word = $1;
	$rest = $';

    # less then 160 words, and too unreliable
#    } elsif (/^\<i\>hence.*?\<\/i\>(.*?)\<\/font\>/i) {
#	$i = $1;
#	$i =~ s/\betc\b/ /g;
#	$i =~ s/\b(see|also)\b.*//i;
#
#	$i =~ s/eIectrochimic/electrochimic/;
#
#	for $j (split /[^-a-zA-Z\300-\326\330-\366\370-\377]+/, $i) {
#	    next if ($j =~ /^-/ || $j =~ /-$/);
#	    $j =~ s/-+/\n/g;
#	    $j =~ s/^\n+//;
#	    $j =~ s/\n+$//;
#	    $j =~ s/\n\n+/\n/g;
#	    print "$j\n" if ($j ne '');
#	}
#	return;
    } else {
	return;
    }

    # remove [ ] from first word
    $word =~ s/^\[(.*?)\]/$1/;

    # is there a language flag?
    $lang = ($word =~ /\[(A|F|G|I|J|L|NL|P)\]/);
    if ($lang) {
	$language = $1;
    }

    # is there a plural?
    $plural = ($word =~ /\(\<i\>pl\<\/i\>\s*([a-zA-Z\300-\326\330-\366\370-\377]+)/);
    if ($plural) {
	print "$1\n";
    }

    # is this a verb with a second stem?
    $second = 0;
    if ($rest =~ m!\<i\>v\</i\>!) {
	if ($word =~ m!\[-?([a-z]+)-/-?([a-z]+)-!) {
	    $a = $1;
	    $b = $2;
	    $second = 1;
	}
    }

    # remove remaining [..] and (..)
    $word =~ s/\s*[\[\(].*//g;

    # remove superscript 1, 2, 3
    $word =~ s/[\262\263\271]//g;

    # remove hyphenation marks
    $word =~ s/\267//g;

    # substitute some illegal characters
    $word =~ s/\222/\'/g;
    $word =~ s/\221/\`/g;

    # skip words that begin or end with "-" "'" "`"
    return if ($word =~ /^[-\'\`]/ || $word =~ /[-\'\`]$/);

    # remove "!" "?" ","
    $word =~ s/[!?,]/ /g;
    $word =~ s/^\s+//;
    $word =~ s/\s+$//;

    # split at space. then unmodified output, unless dot or only one letter
    if ($word =~ /\s/) {
	for $w (split /\s/, $word) {
	    print "$w\n" unless ($w =~ /[\.\/]/ || $w =~ /^.$/ || $w =~ /^\d+$/);
	}
	return;
    }

    return if ($word =~ /\./);

    if ($plural) {
	print "$word\n";
	return;
    }

    if ($lang) {
	if ($rest =~ m!\<i\>n\</i\>!) {
	    if ($language eq 'A') {
		print "$word/U\n";
	    } else {
		print "$word\n";
	    }
	} else {
	    print "$word\n";
	}
	return;
    }

    $i = 0;
    if ($rest =~ m!\<i\>adj\</i\>!) {
	print "$word/A\n";
	$i = 1;
    }
    if ($rest =~ m!\<i\>n\</i\>!) {
	print "$word/N\n";
	$i = 1;
    }
    if ($rest =~ m!\<i\>v\</i\>!) {
	print "$word/V\n";
	$i = 1;
    }
    print "$word\n" if ($i == 0);

    if ($second != 0) {
	if ($word =~ s/(.*)$a/$1$b/) {
	    if ($word =~ s/r$//) {
		print "$word/S\n";
	    }
	}
    }
}


while (<>) {

    $line = decode_entities($_);

    if ($line =~ m!^<li><p align="left">(<b>.*?</b>)!) {
        $line = "$1$'";
	for $_ (split /\s*\<br\>\s*/i, $line) {
	    process_item;
        }
    }
}

while (<DATA>) {
    s/\s+//g;
    print "$_\n" unless /^$/;
}


__END__
illas
illos
los
mi
tu
su
lor
mie
tue
sue
lore
mio
tuo
suo
loro
mia
tua
sua
lora
mies
tues
sues
lores
mios
tuos
suos
loros
mias
tuas
suas
loras
iste
ista
isto
ille
illa
illo

del

raper/V

Gode

pence
pennies

abelian/A
acidificabile/A
acumetria/N
acumetric/A
acumetro/N
acustoelectric/A
acutifoliate/A
additionator/N
adhesivo/N
ad
patres
adventismo/N
adventista/A
adventista/N
aguliettero/N
angelisar/V
ambiophonia/N
amusia/N
angustirostros
anticholeric/A
anticrepusculo/N
antiherpetico/N
antipersonal/A
appellator/N
areniforme/A
audiologista/N
axiomatisar/V
axis/N
betascopio/N
borosilicato/N
brevifoliate/A
brochetta/N
canadianismo/N
caramelisar/V
caramelisation/N
carrettero/N
centralisator/A
chance/N
cibari/A
cladonia/N
cleptomane/A
coassociato/N
codiffusion/N
codonatario/N
coelector/N
coimperator/N
colegatario/N
collectar/V
colonisator/A
comprensibilitate/N
confervaceas
conformista/A
congelabile/A
congelabilitate/N
contraarbitrage/N
contraeffortio/N
convulsar/V
cooperator/A
coordinatographo/N
coparticipante/A
coparticipante/N
coparticipar/V
cornic/A
coteste/N
cryoelectronica/N
cupetta/N
cystocele/N
cytogenic/A
cytolytic/A
dantologo/N
dation/N
decadactyle/A
demoniaco/N
denaturation/N
denivellamento/N
denivellar/V
denivellation/N
dicibile/A
discrescimento/N
disossamento/N
divise/A
dodecandre/A
echolocation/N
ellipsometro/N
endothelial/A
enuretico/N
epicontinental/A
epitaxia/N
epitaxial/A
estimator/N
extramarginal/A
extratemporal/A
fardator/N
fecundabile/A
fecundabilitate/N
filibusteria/N
flabellifere/A
florettista/N
floricole/A
forte-piano/N
franco-belge/A
franco-canadian/A
franco-german/A
franco-italian/A
fricativo/N
frictional/A
frugivoro/N
fuliginositate/N
fumivoro/N
furunculose/A
furunculoso/N
gallicisar/V
gallicisation/N
gallomane/A
gallomano/A
gemmipare/A
geoacustic/A
geopolitic/A
glottitis/N
glumella/N
granulositate/N
gymnopode/A
gymnoptere/A
halation/N
haptonomo/N
haptonomista/N
haspar/V
heliothermometro/N
hendecagyn/A
hendecandre/A
heroinomania/N
hexamotor/A
hexaphylle/A
hispanophobe/A
hispanophobia/N
hispanophobo/N
homogenisation/N
humectabile/A
humectabilitate/N
hydatitis/N
hydrophono/N
icterico/N
ignivore/A
ignivoro/N
illiquiditate/N
imbalsamator/N
inaugurator/A
incitator/N
incommodante/A
inequation/N
inexecutate/A
influentiation/N
inhabitate/A
injunctive/A
inoculabilitate/N
inoxydabilitate/N
in
petto
insensibilisation/N
instinctivitate/N
intergruppo/N
interjacente/A
intermodulation/N
intimidator/N
inundabile/A
invocator/N
ionisante/A
irreprehensibilitate/N
isobatha/N
isochrome/A
isoscelia/N
jeton
justificante/A
juxtalinear/A
kleptomane/A
lactoscopia/N
larviforme/A
latimane/A
latirostre/A
leccator/N
legator/N
leggings
libatorio/N
lignifere/A
lignositate/N
longimane/A
mammitis/N
menaciante/A
methanal/N
microfissura/N
microminiaturisation/N
micromodulo/N
minimicrophono/N
miscomprension/N
multifoliate/A
multiungulate/A
mutilante/A
myrialitro/N
mytilin/A
nanobacterio/N
nanoelectronica/N
nanoprocessor/N
neoceltic/A
neochristianismo/N
nephrographia/N
nephrolithiase
nephrolithiasis
nervifoliate/A
nevroptere/A
nickelator/N
non-interventionista/A
nepenthaceas
nord/A
nosocomial/A
nitrificante/A
numismatographia/N
numismatographo/N
obtusifoliate/A
odontitis/N
oligopolista/N
orthodonte/A
ossiforme/A
ostreiforme/A
paridigitate/A
pentadelphe/A
pentaphylle/A
phallicismo/A
phlebotomista/N
photozincographia/N
picoprocessor/N
pinnifere/A
plastificator/N
pneumocele/N
pneumonalgia/N
polymetallismo/N
polypolio/N
pomiforme/A
positionamento/N
positionar/V
positionator/N
precautional/A
protogyn/A
protogynia/N
puriforme/A
quadricorne/A
quadriflor/A
quadrilobate/A
quasi-renta/N
rastrellata/N
reappunctamento/N
reappunctar/V
recargamento/N
recunear/V
rediscontabile/A
reinocular/V
reparabilitate/N
repavir/V
repesar/V
resonation/N
restaurabilitate/N
reutilisar/V
revoltante/A
rexismo/N
rexista/N
rollmops
rostriforme/A
rubirostre/A
rush/N
sabir/N
salsolaceas
sarcocele/N
scalariforme/A
scleradenitis/N
scorbutico/N
scrotocele/N
sensorimetria/N
sensorimetric/A
separabilitate/N
serrirostros/N
sitologia/N
sitiologia/N
somatopsychic/A
spermatocele/N
spiniforme/A
splenotomia/N
stentorphono/N
stereomicroscopia/N
stomatorrhagia/N
subcapitalisar/V
subcircumsciption/N
subcollector/N
subcomic/A
subcommissario/N
subfermentation/N
subharmonic/A
subpression/N
sud/A
supercapitalisar/V
superchip
superchips
synallagmatic/A
synergic/A
tardiflor/A
tenuicorne/A
tenuicoste/A
tetracephale/A
thermocompressor/N
thermoelectronic/A
thermologia/N
thermomigration/N
thermomolecular/N
thermophono/N
thermoresistente/A
tracheocele/N
tussitor/N
ultra-pur/A
unguligrade/A
unguligrado/N
uniangular/A
unimane/A
unimotor/A
vermilingue/A
viriditate/N
vitrificabilitate/N
xylogravure/N
zoopathologia/N
zootomista/N
