package main import ( "os" "strconv" "github.com/jbowtie/gokogiri" "github.com/jbowtie/gokogiri/xml" "github.com/rug-compling/alpinods" ) type docType struct { Alpino *Alpino_ds Filename string Varroot []interface{} Indexed []*NodeType } var noNode = &NodeType{ NodeAttributes: alpinods.NodeAttributes{ Begin: -1, End: -1, ID: -1, }, Node: []*NodeType{}, } type Alpino_ds struct { Node *NodeType Sentence *sentType } type sentType struct { Sent string SentId string } type NodeType struct { alpinods.NodeAttributes Node []*NodeType Parent *NodeType AxChildren []interface{} AxDescendantsOrSelf []interface{} } func integer(s string, i int) int { i2, err := strconv.Atoi(s) if err == nil { return i2 } return i } func makeNode(el xml.Node) *NodeType { node := &NodeType{ NodeAttributes: alpinods.NodeAttributes{ Begin: integer(el.Attr("begin"), -1), Cat: el.Attr("cat"), Graad: el.Attr("graad"), ID: integer(el.Attr("id"), -1), Index: integer(el.Attr("index"), 0), Lemma: el.Attr("lemma"), Pt: el.Attr("pt"), Word: el.Attr("word"), }, Node: make([]*NodeType, 0), } nodelist, err := el.Search("./node") if err == nil { for _, n := range nodelist { node.Node = append(node.Node, makeNode(n)) } } return node } func parse(filename string) (*docType, error) { b, err := os.ReadFile(filename) if err != nil { return nil, err } pp, err := gokogiri.ParseXml(b) if err != nil { return nil, err } pproot := pp.Root() rootnode, err := pproot.Search("./node") if err != nil { return nil, err } sentence, err := pproot.Search("./sentence") if err != nil { return nil, err } var sent, sentid string if len(sentence) > 0 { sent = sentence[0].Content() sentid = sentence[0].Attr("sentid") } alpino := Alpino_ds{ Node: makeNode(rootnode[0]), Sentence: &sentType{ Sent: sent, SentId: sentid, }, } pp.Free() doc := &docType{ Alpino: &alpino, Filename: filename, Varroot: []interface{}{alpino.Node}, Indexed: []*NodeType{}, } inspect(doc) return doc, nil } func inspect(q *docType) { var walk func(*NodeType) walk = func(node *NodeType) { for _, n := range node.Node { n.Parent = node walk(n) } node.AxChildren = make([]interface{}, 0) node.AxDescendantsOrSelf = make([]interface{}, 1) node.AxDescendantsOrSelf[0] = node for _, n := range node.Node { node.AxChildren = append(node.AxChildren, n) node.AxDescendantsOrSelf = append(node.AxDescendantsOrSelf, n.AxDescendantsOrSelf...) // niet n } if node.Index > 0 && (node.Cat != "" || node.Pt != "") { for node.Index >= len(q.Indexed) { q.Indexed = append(q.Indexed, nil) } q.Indexed[node.Index] = node } } walk(q.Alpino.Node) q.Alpino.Node.Parent = noNode }