/net/aistaff/kleiweg/tgrep2
cd /var/tmp/lassyconv discodop treesearch -e tgrep2 --csv -c '/^VNW\[p[re]/ < /^([Mm]ij|[Jj]ij|[Jj]ou|[Zz]ij|[Ww]ij|[Hh]en|[Hh]un)$/' */*.split.mrg.zst > ~/tmp/lassybig_strong.csv discodop treesearch -e tgrep2 --csv -c '/^VNW\[p[re]/ < /^([Mm]e|[Jj]e|[Zz]e|[Ww]e)$/' */*.split.mrg.zst > ~/tmp/lassybig_weak.csv
In:
<node begin="0" end="3" id="0" rel="top" cat="top"> <node begin="0" end="2" id="1" rel="--" cat="smain"> <node begin="0" end="1" id="2" lemma="het" postag="VNW(pers,pron,stan,red,3,ev,onz)" pt="vnw" rel="su" word="Het" frame="het_noun" genus="onz" getal="ev" his="normal" his_1="decap" his_1_1="normal" lcat="np" naamval="stan" pdtype="pron" persoon="3" pos="noun" rnum="sg" root="het" sense="het" special="het" status="red" vwtype="pers" /> <node begin="1" end="2" id="3" lemma="regenen" postag="WW(pv,tgw,met-t)" pt="ww" rel="hd" word="regent" frame="verb(hebben,sg3,het_subj)" his="normal" his_1="normal" infl="sg3" lcat="smain" pos="verb" pvagr="met-t" pvtijd="tgw" root="regen" sc="het_subj" sense="het-regen" stype="declarative" tense="present" wvorm="pv" /> </node> <node begin="2" end="3" id="4" lemma="." postag="LET()" pt="let" rel="--" word="." frame="punct(punt)" his="normal" his_1="normal" lcat="punct" pos="punct" root="." sense="." special="punt" /> </node>
Uit, voorbeeld 1:
(node (begin 0) (end 3) (id 0) (rel top) (cat top)
(node (begin 0) (end 2) (id 1) (rel --) (cat smain)
(node (begin 0) (end 1) (id 2) (lemma het)
(postag VNW_[pers,pron,stan,red,3,ev,onz_]) (pt vnw) (rel su)
(word Het) (frame het_noun) (genus onz) (getal ev)
(his normal) (his_1 decap) (his_1_1 normal) (lcat np)
(naamval stan) (pdtype pron) (persoon 3) (pos noun)
(rnum sg) (root het) (sense het) (special het) (status red)
(vwtype pers) )
(node (begin 1) (end 2) (id 3) (lemma regenen)
(postag WW_[pv,tgw,met-t_]) (pt ww) (rel hd) (word regent)
(frame verb(hebben,sg3,het_subj)) (his normal)
(his_1 normal) (infl sg3) (lcat smain) (pos verb)
(pvagr met-t) (pvtijd tgw) (root regen) (sc het_subj)
(sense het-regen) (stype declarative) (tense present)
(wvorm pv) ) )
(node (begin 2) (end 3) (id 4) (lemma .) (postag LET_[_]) (pt let)
(rel --) (word .) (frame punct_[punt_]) (his normal)
(his_1 normal) (lcat punct) (pos punct) (root .) (sense .)
(special punt) ) )
Codering van bijzondere tekens:
| teken | codering |
|---|---|
| ( | _[ |
| ) | _] |
| spatie | __ |
| _ | _- |
Uit, voorbeeld 2:
(top
(--
(cat smain)
(su
(lemma het)
(pt vnw)
(word Het))
(hd
(lemma regenen)
(pt ww)
(word regent)))
(--
(lemma .)
(pt let)
(word .)))