/* Zoek een enkel bestand uit de treebank op in Sonar500 */ package main import ( "encoding/xml" "fmt" "github.com/pebbe/util" "io/ioutil" "labix.org/v2/mgo" "labix.org/v2/mgo/bson" "os" "path" "regexp" "strings" ) var ( servers = "127.0.0.1:27047" ) type Alpino struct { Sentence string `xml:"sentence"` } type Item struct { I string S string K string W string } func main() { noalpha := regexp.MustCompile("[^a-zA-Z]+") strict := false if os.Args[1] == "-s" { strict = true os.Args = append(os.Args[:1], os.Args[2:]...) } session, err := mgo.Dial(servers) util.CheckErr(err) defer session.Close() basename := path.Base(os.Args[1]) basename = basename[:strings.LastIndex(basename, "-")] data, err := ioutil.ReadFile(os.Args[1]) util.CheckErr(err) v := Alpino{} util.CheckErr(xml.Unmarshal(data, &v)) fmt.Println(v.Sentence) text := v.Sentence text = noalpha.ReplaceAllString(text, "") text = strings.ToLower(text) kort := text if len(kort) > 100 { kort = kort[:100] } collection := session.DB("sonar500").C(basename) idxs, _ := collection.Indexes() if len(idxs) < 3 { fmt.Println(" ERROR no database") return } var query *mgo.Query if strict { query = collection.Find(bson.M{"k": kort, "s": text, "w": v.Sentence}) } else { query = collection.Find(bson.M{"k": kort, "s": text}) } //query = query.Select(bson.M{"i": true, "w": true}) var item Item iter := query.Iter() for iter.Next(&item) { fmt.Printf(" %s\n %s\n", item.I, item.W) } }