Tests on artificial data with 60% noise

In [1]:
library(adabag)
library(naivebayes)
Loading required package: rpart
Loading required package: caret
Loading required package: lattice
Loading required package: ggplot2
Loading required package: foreach
Loading required package: doParallel
Loading required package: iterators
Loading required package: parallel
In [2]:
# files available in: /net/aistaff/kleiweg/spraak/fa
train = read.table("data060.train", header=TRUE, sep="\t", quote="", row.names=1)
test  = read.table("data060.test",  header=TRUE, sep="\t", quote="", row.names=1)
In [3]:
train[1:10,]
C.ClassC.W1C.W2C.W3C.W4C.W5C.W6C.W7C.W8C.W9C.W11C.W12C.W13C.W14C.W15C.W16C.W17C.W18C.W19C.W20
869A 1.A1 2.A1 3.A1 4.A1 5.A6 6.C3 7.C4 8.A2 9.A1 11.A212.A113.B714.C315.A116.B217.A118.C619.A320.C3
19A 1.A1 2.A1 3.B2 4.A1 5.A3 6.A7 7.A5 8.B1 9.A1 11.C512.B113.C114.A115.A116.A117.B418.A219.A420.C3
439B 1.B1 2.B1 3.A1 4.B1 5.C1 6.B7 7.C1 8.C2 9.B1 11.A212.B213.B114.C115.B116.C117.B318.B119.B420.C2
870A 1.C3 2.B3 3.C3 4.A1 5.C2 6.A5 7.C4 8.A2 9.A1 11.A112.A113.A114.B115.A116.A517.A118.A219.A520.C2
546A 1.A1 2.C7 3.B2 4.A1 5.A3 6.C1 7.A2 8.C1 9.A1 11.A112.B213.A114.A115.A116.A517.B718.B119.C120.C2
567B 1.A1 2.B2 3.B2 4.A1 5.B2 6.B3 7.B6 8.B1 9.B2 11.A112.A113.C114.C115.A116.C117.A118.B119.A320.C1
739B 1.B4 2.C1 3.C3 4.B1 5.C1 6.B1 7.C4 8.C2 9.A1 11.B312.C313.B214.B115.A116.B217.B618.B119.B220.B1
437A 1.A1 2.A1 3.A1 4.C2 5.A4 6.A4 7.A2 8.A1 9.A1 11.A112.C313.A114.A115.C316.C117.B418.A419.A420.B1
640C 1.A1 2.C7 3.C1 4.C2 5.C1 6.C3 7.C3 8.C2 9.B1 11.A112.C213.B114.C315.C416.A117.A118.C219.C720.C1
440B 1.B5 2.B2 3.B2 4.B1 5.A3 6.B2 7.A5 8.C2 9.C1 11.B412.B213.B114.B115.A116.B217.B718.B119.B420.C1

Bagging (AdaBag)

In [4]:
bag <- bagging(C.Class ~ ., data=train)
train.bagging <- predict(bag, newdata=train)
 test.bagging <- predict(bag, newdata=test)
100 * (1 - train.bagging$error)
100 * (1 -  test.bagging$error)
97.1111111111111
85

Boosting (AdaBoost)

In [5]:
boost <- boosting(C.Class ~ ., data=train)
train.boosting <- predict(boost, newdata=train)
 test.boosting <- predict(boost, newdata=test)
100 * (1 - train.boosting$error)
100 * (1 -  test.boosting$error)
100
94

Naive Bayes

In [6]:
score <- function(obs, exp) {
  return(100 * sum(obs == exp[,"C.Class"]) / length(obs))
}

nb <- naive_bayes(C.Class ~ ., data=train)
train.nb <- predict(nb, train)
 test.nb <- predict(nb, test)
score(train.nb, train)
score( test.nb, test)
97.7777777777778
98

simpel.go

In [7]:
out <- system2(c("./simpel", "data060.train", "data060.test"), stdout=TRUE, stderr=TRUE)
cat(out, sep="\n")
Training score:	 98.0%
Testing score:	 98.0%