package main import ( "fmt" "os" "sort" "strconv" "github.com/jbowtie/gokogiri" "github.com/jbowtie/gokogiri/xml" "github.com/pebbe/util" ) var x = util.CheckErr func main() { for _, filename := range os.Args[1:] { processFile(filename) } } func terminals(node, rootnode xml.Node) []xml.Node { result := make([]xml.Node, 0) ids := make(map[string]bool) nodelist, err := node.Search(`./descendant-or-self::node[@pt or (@index and not(@cat or @pt))]`) x(err) for _, n := range nodelist { if n.Attr("pt") != "" { if id := n.Attr("id"); !ids[id] { ids[id] = true result = append(result, n) } } else { nodelist2, err := rootnode.Search(`.//node[@index="` + n.Attr("index") + `" and (@pt or @cat)]`) x(err) for _, m := range terminals(nodelist2[0], rootnode) { if id := m.Attr("id"); !ids[id] { ids[id] = true result = append(result, m) } } } } return result } func deste(node xml.Node) []xml.Node { result, err := node.Search(` .//node[@cat="du" and count(.//node[node[@lemma="hoe" or @lemma="deste" or (node[@lemma="des"] and node[@lemma="te"])] and node[@graad="comp"]])>1]`) x(err) return result } func processFile(filename string) { b, err := os.ReadFile(filename) x(err) pp, err := gokogiri.ParseXml(b) x(err) defer pp.Free() pproot := pp.Root() roots, err := pproot.Search("./node") x(err) rootnode := roots[0] results := make([]xml.Node, 0) for _, node := range deste(rootnode) { if len(deste(node)) == 0 { results = append(results, node) } } var sentid string if len(results) > 0 { if sentence, err := pproot.Search("./sentence"); err == nil { if len(sentence) > 0 { sentid = sentence[0].Attr("sentid") } } } for _, r := range results { fmt.Printf("%s\n %s\t", sentid, r.Attr("id")) terms := terminals(r, rootnode) sort.Slice(terms, func(i, j int) bool { ii, _ := strconv.Atoi(terms[i].Attr("begin")) jj, _ := strconv.Atoi(terms[j].Attr("begin")) return ii < jj }) for _, t := range terms { fmt.Print(t.Attr("word"), " ") } fmt.Println() } }