In this study, we investigate differences between native English speakers and the English pronunciation of Dutch and German speakers. We focus on the articulatory trajectories obtained using electromagnetic articulography and particularly investigate two sound contrasts: /t/-/θ/ and /s/-/ʃ/. Our results show that while German speakers make both sound contrasts adequately, the Dutch speakers do not distinguish them clearly. To further evaluate these results, both a human Dutch listener as well as an automatic speech recognition (ASR) system classified the pronounced words on the basis of the acoustic recording. Both classifications lined up with the articulatory results. For Dutch speakers, /θ/-words (and /s/-words) were more frequently recognized as /t/-words (and /ʃ/-words). However, the intended utterance was still recognized in the majority of cases for the Dutch speakers. The perceptual results therefore do not support a complete merger of the sounds in Dutch.
Journal: Submitted to Proceedings of ISSP 2017Submitted
Preprint: http://www.martijnwieling.nl/files/ISSP-Wieling.pdf
Keywords: Generalized additive modeling; Tutorial; Articulography; Second language acquisition
## Generated on: August 30, 2017 - 23:39:25
The following commands load the necessary functions and libraries and show the version information.
# install packages if not yet installed
packages <- c("mgcv","itsadug","lme4")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
# load required packages
library(mgcv)
library(itsadug)
library(lme4)
# version information
R.version.string
## [1] "R version 3.4.1 (2017-06-30)"
cat(paste('mgcv version:',packageVersion('mgcv')))
## mgcv version: 1.8.18
cat(paste('itsadug version:',packageVersion('itsadug')))
## itsadug version: 2.2.4
cat(paste('lme4 version:',packageVersion('lme4')))
## lme4 version: 1.1.12
The following shows the columns of the full dataset and their explanation.
if (!file.exists('datth.rda')) {
download.file('http://www.let.rug.nl/wieling/ISSP2017/datth.rda', 'datth.rda')
}
if (!file.exists('datsh.rda')) {
download.file('http://www.let.rug.nl/wieling/ISSP2017/datsh.rda', 'datsh.rda')
}
load('datth.rda')
load('datsh.rda')
The dataset datsh
consists of 265599 rows and 10 columns, whereas the dataset datth
consists of 223954 rows and 10 columns. Both datasets have the following column names:
colnames(datth)
## [1] "Speaker" "Lang" "Sensor" "Axis" "Trial" "Word" "Sound"
## [8] "Loc" "Time" "Pos"
"NL"
for Dutch, "DE"
for German, or "EN"
for English)"X"
, the anterior-posterior position)"TH"
for words with the dental fricative, "T"
for words with the stop ; or "SH"
for words with the post-alveolar fricative, and "S"
for words with the alveolar fricative in dataset datsh
)"START"
when it occurs at the beginning of the word or "END"
when it occurs at the back of the worddatth <- start_event(datth,event=c("Speaker","Trial"))
datth$LangLoc <- interaction(datth$Lang, datth$Loc)
datth$IsENTHStart <- (datth$Lang == "EN" & datth$Sound == "TH" & datth$Loc == "Start")*1
datth$IsNLTHStart <- (datth$Lang == "NL" & datth$Sound == "TH" & datth$Loc == "Start")*1
datth$IsDETHStart <- (datth$Lang == "DE" & datth$Sound == "TH" & datth$Loc == "Start")*1
datth$IsENTHEnd <- (datth$Lang == "EN" & datth$Sound == "TH" & datth$Loc == "End")*1
datth$IsNLTHEnd <- (datth$Lang == "NL" & datth$Sound == "TH" & datth$Loc == "End")*1
datth$IsDETHEnd <- (datth$Lang == "DE" & datth$Sound == "TH" & datth$Loc == "End")*1
datth$SpeakerSoundLoc <- interaction(datth$Speaker, datth$Sound, datth$Loc)
system.time(th1 <- bam(Pos ~ LangLoc + s(Time,by=LangLoc) + s(Time,by=IsENTHStart) + s(Time,by=IsENTHEnd) + s(Time,by=IsNLTHStart) + s(Time,by=IsNLTHEnd) + s(Time,by=IsDETHStart) + s(Time,by=IsDETHEnd) + s(Time,SpeakerSoundLoc,bs="fs",m=1) + s(Time,Word,bs="fs",m=1), data=datth, discrete=TRUE, rho=0.999, nthreads=8, AR.start=datth$start.event))
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has
## repeated 1-d smooths of same variable.
## user system elapsed
## 872.532 6.548 150.889
acf_resid(th1)
(smryth1 <- summary(th1))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Pos ~ LangLoc + s(Time, by = LangLoc) + s(Time, by = IsENTHStart) +
## s(Time, by = IsENTHEnd) + s(Time, by = IsNLTHStart) + s(Time,
## by = IsNLTHEnd) + s(Time, by = IsDETHStart) + s(Time, by = IsDETHEnd) +
## s(Time, SpeakerSoundLoc, bs = "fs", m = 1) + s(Time, Word,
## bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2872 0.1038 -2.768 0.00564 **
## LangLocEN.End 0.1297 0.1701 0.762 0.44601
## LangLocNL.End 0.3559 0.1563 2.277 0.02280 *
## LangLocDE.Start 0.2498 0.1676 1.491 0.13597
## LangLocEN.Start 0.1647 0.1790 0.920 0.35751
## LangLocNL.Start 0.2462 0.1773 1.389 0.16493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time):LangLocDE.End 1.002 1.003 7.464 0.00623 **
## s(Time):LangLocEN.End 3.017 3.368 3.357 0.03084 *
## s(Time):LangLocNL.End 4.531 4.872 2.160 0.10285
## s(Time):LangLocDE.Start 7.318 7.474 4.999 7.34e-06 ***
## s(Time):LangLocEN.Start 6.906 7.096 2.609 0.00530 **
## s(Time):LangLocNL.Start 6.835 7.019 1.431 0.23196
## s(Time):IsENTHStart 7.338 7.592 8.109 1.04e-10 ***
## s(Time):IsENTHEnd 6.053 6.354 2.200 0.05283 .
## s(Time):IsNLTHStart 4.724 5.054 1.249 0.23463
## s(Time):IsNLTHEnd 6.228 6.526 0.529 0.83424
## s(Time):IsDETHStart 8.601 8.755 25.994 < 2e-16 ***
## s(Time):IsDETHEnd 8.558 8.715 17.439 < 2e-16 ***
## s(Time,SpeakerSoundLoc) 2037.213 2481.000 15.669 < 2e-16 ***
## s(Time,Word) 150.959 176.000 133.483 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.468 Deviance explained = 47.3%
## fREML = -2.1637e+05 Scale est. = 3.7864 n = 223954
par(mfrow=c(3,2),mar=c(5.1, 5.1, 4.1, 2.1))
plot(th1,select=7,shade=T,rug=F, ylim=c(-0.6,2.1), main='TH vs T: English (start)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(th1,select=8,shade=T,rug=F, ylim=c(-0.6,2.1), main='TH vs T: English (end)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(th1,select=11,shade=T,rug=F, ylim=c(-0.6,2.1), main='TH vs T: German (start)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(th1,select=12,shade=T,rug=F, ylim=c(-0.6,2.1), main='TH vs T: German (end)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(th1,select=9,shade=T,rug=F, ylim=c(-0.6,2.1), main='TH vs T: Dutch (start)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(th1,select=10,shade=T,rug=F, ylim=c(-0.6,2.1), main='TH vs T: Dutch (end)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
datsh <- start_event(datsh, event=c("Speaker","Trial"))
datsh$LangLoc <- interaction(datsh$Lang, datsh$Loc)
datsh$IsENSHStart <- (datsh$Lang == "EN" & datsh$Sound == "SH" & datsh$Loc == "Start")*1
datsh$IsNLSHStart <- (datsh$Lang == "NL" & datsh$Sound == "SH" & datsh$Loc == "Start")*1
datsh$IsDESHStart <- (datsh$Lang == "DE" & datsh$Sound == "SH" & datsh$Loc == "Start")*1
datsh$IsENSHEnd <- (datsh$Lang == "EN" & datsh$Sound == "SH" & datsh$Loc == "End")*1
datsh$IsNLSHEnd <- (datsh$Lang == "NL" & datsh$Sound == "SH" & datsh$Loc == "End")*1
datsh$IsDESHEnd <- (datsh$Lang == "DE" & datsh$Sound == "SH" & datsh$Loc == "End")*1
datsh$SpeakerSoundLoc <- interaction(datsh$Speaker, datsh$Sound, datsh$Loc)
system.time(sh1 <- bam(Pos ~ LangLoc + s(Time,by=LangLoc) + s(Time,by=IsENSHStart) + s(Time,by=IsENSHEnd) + s(Time,by=IsNLSHStart) + s(Time,by=IsNLSHEnd) + s(Time,by=IsDESHStart) + s(Time,by=IsDESHEnd) + s(Time,SpeakerSoundLoc,bs="fs",m=1) + s(Time,Word,bs="fs",m=1), data=datsh, discrete=TRUE, rho=0.999, , nthreads=8, AR.start=datsh$start.event))
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has
## repeated 1-d smooths of same variable.
## user system elapsed
## 1239.356 10.288 216.458
acf_resid(sh1)
(smrysh1 <- summary(sh1))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Pos ~ LangLoc + s(Time, by = LangLoc) + s(Time, by = IsENSHStart) +
## s(Time, by = IsENSHEnd) + s(Time, by = IsNLSHStart) + s(Time,
## by = IsNLSHEnd) + s(Time, by = IsDESHStart) + s(Time, by = IsDESHEnd) +
## s(Time, SpeakerSoundLoc, bs = "fs", m = 1) + s(Time, Word,
## bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01990 0.09964 0.200 0.842
## LangLocEN.End -0.10295 0.14271 -0.721 0.471
## LangLocNL.End -0.15537 0.14255 -1.090 0.276
## LangLocDE.Start 0.17203 0.14287 1.204 0.229
## LangLocEN.Start 0.16462 0.14636 1.125 0.261
## LangLocNL.Start -0.01689 0.15045 -0.112 0.911
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time):LangLocDE.End 7.394 7.534 12.784 < 2e-16 ***
## s(Time):LangLocEN.End 7.530 7.669 6.548 1.68e-08 ***
## s(Time):LangLocNL.End 5.816 6.055 1.855 0.096879 .
## s(Time):LangLocDE.Start 6.069 6.311 3.629 0.000718 ***
## s(Time):LangLocEN.Start 6.526 6.739 6.669 2.74e-07 ***
## s(Time):LangLocNL.Start 5.588 5.846 0.678 0.675705
## s(Time):IsENSHStart 6.180 6.509 4.055 0.000187 ***
## s(Time):IsENSHEnd 5.075 5.420 2.061 0.090637 .
## s(Time):IsNLSHStart 3.747 4.062 0.807 0.562415
## s(Time):IsNLSHEnd 5.620 5.949 1.025 0.417433
## s(Time):IsDESHStart 8.497 8.666 20.351 < 2e-16 ***
## s(Time):IsDESHEnd 7.074 7.339 7.621 1.81e-05 ***
## s(Time,SpeakerSoundLoc) 2040.295 2481.000 16.271 < 2e-16 ***
## s(Time,Word) 166.613 194.000 139.157 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.452 Deviance explained = 45.7%
## fREML = -2.9479e+05 Scale est. = 2.8639 n = 265599
par(mfrow=c(3,2),mar=c(5.1, 5.1, 4.1, 2.1))
plot(sh1,select=7,shade=T,rug=F, ylim=c(-2.1,0.6), main='SH vs S: English (start)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(sh1,select=8,shade=T,rug=F, ylim=c(-2.1,0.6), main='SH vs S: English (end)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(sh1,select=11,shade=T,rug=F, ylim=c(-2.1,0.6), main='SH vs S: German (start)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(sh1,select=12,shade=T,rug=F, ylim=c(-2.1,0.6), main='SH vs S: German (end)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(sh1,select=9,shade=T,rug=F, ylim=c(-2.1,0.6), main='SH vs S: Dutch (start)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
plot(sh1,select=10,shade=T,rug=F, ylim=c(-2.1,0.6), main='SH vs S: Dutch (end)', cex.lab=2.5, cex.axis=2.5, cex.main=2.5, cex.sub=2.5, ylab='Position difference')
abline(h=0)
The following shows the columns of the full dataset and their explanation.
if (!file.exists('perc.rda')) {
download.file('http://www.let.rug.nl/wieling/ISSP2017/perc.rda', 'perc.rda')
}
load('perc.rda')
The dataset perc
consists of 6468 rows and 13 columns with the following column names:
colnames(perc)
## [1] "Speaker" "Gender" "BirthYear"
## [4] "Lang" "Trial" "WordActual"
## [7] "WordRecognized" "WordRecognizedASR" "Sound"
## [10] "SoundRecog" "SoundRecogASR" "Correct"
## [13] "CorrectASR"
"NL"
for Dutch, "DE"
for German, or "EN"
for English)"TH"
for words with the dental fricative, "T"
for words with the stop, "St"
for words with an s instead of the stop or dental fricative, "SH"
for words with a post-alveolar fricative, "S"
for words with an alveolar fricative1
if the Dutch speaker recognized the word which was pronounced by the speaker, 0
if not1
if the Google ASR system recognized the word which was pronounced by the speaker, 0
if notround(mean(2014-perc[perc$Lang=='EN',]$BirthYear),1)
## [1] 25
table(unique(perc[perc$Lang=='EN',c("Speaker","Gender")])$Gender)
##
## F M
## 14 8
round(mean(2014-perc[perc$Lang=='NL',]$BirthYear),1)
## [1] 20.7
table(unique(perc[perc$Lang=='NL',c("Speaker","Gender")])$Gender)
##
## F M
## 8 12
round(mean(2014-perc[perc$Lang=='DE',]$BirthYear),1)
## [1] 23
table(unique(perc[perc$Lang=='DE',c("Speaker","Gender")])$Gender)
##
## F M
## 16 11
# TH slechter in NL herkend
percth = droplevels(perc[perc$Sound %in% c('TH'),])
m = glmer(Correct ~ Lang + (1|Speaker), data=percth, family='binomial', control = glmerControl(optimizer = "bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Correct ~ Lang + (1 | Speaker)
## Data: percth
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 1082.4 1102.9 -537.2 1074.4 1241
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5770 0.2608 0.3328 0.4291 1.1997
##
## Random effects:
## Groups Name Variance Std.Dev.
## Speaker (Intercept) 0.7665 0.8755
## Number of obs: 1245, groups: Speaker, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.2043 0.2485 8.871 < 2e-16 ***
## LangDE -0.1513 0.3327 -0.455 0.649330
## LangNL -1.2735 0.3410 -3.734 0.000188 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LangDE
## LangDE -0.728
## LangNL -0.721 0.532
round(prop.table(with(percth[percth$Lang=='NL',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound St T TH
## TH 0.14 0.17 0.70
round(prop.table(with(percth[percth$Lang=='DE',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound St T TH
## TH 0.07 0.07 0.86
round(prop.table(with(percth[percth$Lang=='EN',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound St T TH
## TH 0.09 0.04 0.88
percs = droplevels(perc[perc$Sound %in% c('S'),])
m = glmer(Correct ~ Lang + (1|Speaker), data=percs, family='binomial')
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Correct ~ Lang + (1 | Speaker)
## Data: percs
##
## AIC BIC logLik deviance df.resid
## 880.6 901.6 -436.3 872.6 1436
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0750 0.1493 0.2343 0.3490 1.0307
##
## Random effects:
## Groups Name Variance Std.Dev.
## Speaker (Intercept) 1.053 1.026
## Number of obs: 1440, groups: Speaker, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.5928 0.2912 8.904 <2e-16 ***
## LangDE 0.8431 0.4187 2.014 0.0440 *
## LangNL -0.8972 0.3951 -2.271 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LangDE
## LangDE -0.630
## LangNL -0.711 0.473
round(prop.table(with(percs[percs$Lang=='NL',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound S SH
## S 0.81 0.19
round(prop.table(with(percs[percs$Lang=='DE',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound S SH
## S 0.95 0.05
round(prop.table(with(percs[percs$Lang=='EN',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound S SH
## S 0.9 0.1
percsh = droplevels(perc[perc$Sound %in% c('SH'),])
m = glmer(Correct ~ Lang + (1|Speaker), data=percsh, family='binomial')
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Correct ~ Lang + (1 | Speaker)
## Data: percsh
##
## AIC BIC logLik deviance df.resid
## 457.5 478.0 -224.7 449.5 1256
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.4273 0.1450 0.1836 0.2206 0.4708
##
## Random effects:
## Groups Name Variance Std.Dev.
## Speaker (Intercept) 0.6346 0.7966
## Number of obs: 1260, groups: Speaker, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.6008 0.3561 10.112 <2e-16 ***
## LangDE -0.5327 0.4186 -1.273 0.203
## LangNL -0.1490 0.4698 -0.317 0.751
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LangDE
## LangDE -0.734
## LangNL -0.641 0.539
round(prop.table(with(percsh[percsh$Lang=='NL',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound S SH
## SH 0.04 0.96
round(prop.table(with(percsh[percsh$Lang=='DE',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound S SH
## SH 0.06 0.94
round(prop.table(with(percsh[percsh$Lang=='EN',],table(Sound,SoundRecog))),2)
## SoundRecog
## Sound S SH
## SH 0.03 0.97
percth[percth$SoundRecogASR %in% c('S','SH'),]$SoundRecogASR = NA # other word recognized
m = glmer(CorrectASR ~ Lang + (1|Speaker), data=percth, family='binomial')
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: CorrectASR ~ Lang + (1 | Speaker)
## Data: percth
##
## AIC BIC logLik deviance df.resid
## 1409.1 1429.6 -700.6 1401.1 1241
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.0977 -0.5992 -0.5002 1.1029 2.8116
##
## Random effects:
## Groups Name Variance Std.Dev.
## Speaker (Intercept) 0.2989 0.5467
## Number of obs: 1245, groups: Speaker, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7867 0.1595 -4.933 8.09e-07 ***
## LangDE -0.1526 0.2181 -0.700 0.484210
## LangNL -0.9084 0.2501 -3.632 0.000282 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LangDE
## LangDE -0.727
## LangNL -0.625 0.460
round(prop.table(with(percth[percth$Lang=='NL',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound St T TH
## TH 0.67 0.14 0.19
round(prop.table(with(percth[percth$Lang=='DE',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound St T TH
## TH 0.62 0.07 0.31
round(prop.table(with(percth[percth$Lang=='EN',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound St T TH
## TH 0.61 0.06 0.33
percs = droplevels(perc[perc$Sound %in% c('S'),])
percs[percs$SoundRecogASR %in% c('T','St','TH'),]$SoundRecogASR = NA # other word recognized
m = glmer(CorrectASR ~ Lang + (1|Speaker), data=percs, family='binomial')
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: CorrectASR ~ Lang + (1 | Speaker)
## Data: percs
##
## AIC BIC logLik deviance df.resid
## 1041.2 1062.3 -516.6 1033.2 1436
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2568 0.1821 0.2455 0.3982 1.1722
##
## Random effects:
## Groups Name Variance Std.Dev.
## Speaker (Intercept) 0.99 0.995
## Number of obs: 1440, groups: Speaker, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.64434 0.29105 9.085 < 2e-16 ***
## LangDE 0.02867 0.38225 0.075 0.940218
## LangNL -1.34533 0.38586 -3.487 0.000489 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LangDE
## LangDE -0.703
## LangNL -0.735 0.529
round(prop.table(with(percs[percs$Lang=='NL',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound S SH
## S 0.84 0.16
round(prop.table(with(percs[percs$Lang=='DE',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound S SH
## S 0.98 0.02
round(prop.table(with(percs[percs$Lang=='EN',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound S SH
## S 0.93 0.07
percsh = droplevels(perc[perc$Sound %in% c('SH'),])
percsh[percsh$SoundRecogASR %in% c('T','St','TH'),]$SoundRecogASR = NA # other word recognized
m = glmer(CorrectASR ~ Lang + (1|Speaker), data=percsh, family='binomial')
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: CorrectASR ~ Lang + (1 | Speaker)
## Data: percsh
##
## AIC BIC logLik deviance df.resid
## 1161.6 1182.1 -576.8 1153.6 1256
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5596 0.2156 0.3561 0.4816 1.6034
##
## Random effects:
## Groups Name Variance Std.Dev.
## Speaker (Intercept) 1.243 1.115
## Number of obs: 1260, groups: Speaker, 69
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.9637 0.2893 6.788 1.13e-11 ***
## LangDE -0.5200 0.3792 -1.371 0.170
## LangNL -0.2110 0.4108 -0.514 0.608
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) LangDE
## LangDE -0.748
## LangNL -0.686 0.520
round(prop.table(with(percsh[percsh$Lang=='NL',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound S SH
## SH 0.09 0.91
round(prop.table(with(percsh[percsh$Lang=='DE',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound S SH
## SH 0.18 0.82
round(prop.table(with(percsh[percsh$Lang=='EN',],table(Sound,SoundRecogASR))),2)
## SoundRecogASR
## Sound S SH
## SH 0.14 0.86
To replicate the analysis presented above, you can just copy the following lines to the most recent version of R. Please note that you first need to install Pandoc.
download.file('http://www.let.rug.nl/wieling/ISSP2017/analysisISSP.Rmd', 'analysisISSP.Rmd')
if (length(setdiff('rmarkdown', rownames(installed.packages()))) > 0) {
install.packages('rmarkdown')
}
library(rmarkdown)
render('analysisISSP.Rmd') # generates html file with results
browseURL(paste('file://', file.path(getwd(),'analysisISSP.html'), sep='')) # shows result