#!/usr/bin/env python3

from lxml import etree as ET

import sys

def terminals(node, rootnode):
    result = []
    ids = set(())
    for n in node.xpath('descendant-or-self::node[@pt or (@index and not(@cat or @pt))]'):
        if n.get('pt', '') != "":
            i = n.get('id')
            if not i in ids:
                ids.add(i)
                result.append(n)
        else:
            for m in terminals(rootnode.xpath('.//node[@index="{}" and (@pt or @cat)]'.format(n.get('index')))[0], rootnode):
                i = m.get('id')
                if not i in ids:
                    ids.add(i)
                    result.append(m)
    return result

def deste(node):
    return node.xpath('''.//node[@cat="du"
            and
            count(.//node[node[@lemma="hoe" or @lemma="deste"
                               or
                               (node[@lemma="des"] and node[@lemma="te"])]
                          and
                          node[@graad="comp"]])>1]''')

def processFile(filename):
    doc = ET.parse(filename)
    rootnode = doc.find('node')
    sentid = doc.find('sentence').get('sentid')

    results = []
    for node in deste(rootnode):
        if len(deste(node)) == 0:
            results.append(node)
    for r in results:
        sys.stdout.write("{}\n    {}\t".format(sentid, r.get('id')))
        terms = terminals(r, rootnode)
        terms.sort(key=lambda k: int(k.get('begin')))
        for t in terms:
            sys.stdout.write(t.get('word') + " ")
        sys.stdout.write("\n")


for name in sys.argv[1:]:
    processFile(name)
