The present study uses electromagnetic articulography, through which the position of tongue and lips during speech is measured, for the study of dialect variation. By using generalized additive modeling to analyze the articulatory trajectories, we are able to reliably detect aggregate group differences, while simultaneously taking into account the individual variation of dozens of speakers. Our results show that two Dutch dialects show clear differences in their articulatory settings, with generally a more anterior tongue position in the dialect from Ubbergen in the southern half of the Netherlands than in the dialect of Ter Apel in the northern half of the Netherlands. A comparison with formant-based acoustic measurements further reveals that articulography is able to reveal interesting structural articulatory differences between dialects which are not visible when only focusing on the acoustic signal.
Journal: Revised version submitted (July, 2016) to Journal of Phonetics
Preprint: http://www.martijnwieling.nl/files/WielingEtAl-art.pdf
All source data: http://www.let.rug.nl/wieling/DiaArt/SourceData/
Keywords: Articulography; Dialectology; Generalized additive modeling; Articulatory setting
## Generated on: July 21, 2016 - 17:46:43
The following commands load the necessary functions and libraries and show the version information.
# install packages if not yet installed
packages <- c("mgcv","itsadug","lme4","parallel","MASS","reshape2")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
# load required packages
library(mgcv)
library(itsadug)
library(lme4)
library(parallel)
library(MASS)
library(reshape2)
# custom plotting functions
if (!file.exists('plotArt2D.R')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/plotArt2D.R','plotArt2D.R')
}
source('plotArt2D.R')
# version information
R.version.string
## [1] "R version 3.3.1 (2016-06-21)"
cat(paste('mgcv version:',packageVersion('mgcv')))
## mgcv version: 1.8.12
cat(paste('itsadug version:',packageVersion('itsadug')))
## itsadug version: 2.2
The following shows the columns of the dataset and their explanation.
if (!file.exists('dat.rda')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/dat.rda','dat.rda') # 77 MB
}
load('dat.rda')
dat = droplevels(dat[dat$YearBirth > 1990,],except=colnames(dat)[sapply(dat,is.ordered)]) # exclude older people
The dataset consists of 1734402 rows and 102 columns with the following column names:
colnames(dat)
## [1] "Speaker" "Group"
## [3] "IsTerApel" "Gender"
## [5] "YearBirth" "PlaceBirth"
## [7] "Word" "WordNr"
## [9] "Type" "Segment"
## [11] "SegmentNr" "Sensor"
## [13] "Axis" "SensorAxis"
## [15] "GroupTypeSensorAxis" "SpeakerSensorAxis"
## [17] "SpeakerTypeSensorAxis" "WordSensorAxis"
## [19] "WordGroupSensorAxis" "IsTA.T1.P"
## [21] "IsTA.T1.H" "IsTA.T2.P"
## [23] "IsTA.T2.H" "IsTA.T3.P"
## [25] "IsTA.T3.H" "IsCVC.T1.P"
## [27] "IsCVC.T1.H" "IsCVC.T2.P"
## [29] "IsCVC.T2.H" "IsCVC.T3.P"
## [31] "IsCVC.T3.H" "IsDia.T1.P"
## [33] "IsDia.T1.H" "IsDia.T2.P"
## [35] "IsDia.T2.H" "IsDia.T3.P"
## [37] "IsDia.T3.H" "IsTADia.T1.P"
## [39] "IsTADia.T1.H" "IsTADia.T2.P"
## [41] "IsTADia.T2.H" "IsTADia.T3.P"
## [43] "IsTADia.T3.H" "IsTACVC.T1.P"
## [45] "IsTACVC.T1.H" "IsTACVC.T2.P"
## [47] "IsTACVC.T2.H" "IsTACVC.T3.P"
## [49] "IsTACVC.T3.H" "IsTA.T1.PO"
## [51] "IsTA.T1.HO" "IsTA.T2.PO"
## [53] "IsTA.T2.HO" "IsTA.T3.PO"
## [55] "IsTA.T3.HO" "IsCVC.T1.PO"
## [57] "IsCVC.T1.HO" "IsCVC.T2.PO"
## [59] "IsCVC.T2.HO" "IsCVC.T3.PO"
## [61] "IsCVC.T3.HO" "IsDia.T1.PO"
## [63] "IsDia.T1.HO" "IsDia.T2.PO"
## [65] "IsDia.T2.HO" "IsDia.T3.PO"
## [67] "IsDia.T3.HO" "IsTACVC.T1.PO"
## [69] "IsTACVC.T1.HO" "IsTACVC.T2.PO"
## [71] "IsTACVC.T2.HO" "IsTACVC.T3.PO"
## [73] "IsTACVC.T3.HO" "IsTADia.T1.PO"
## [75] "IsTADia.T1.HO" "IsTADia.T2.PO"
## [77] "IsTADia.T2.HO" "IsTADia.T3.PO"
## [79] "IsTADia.T3.HO" "Word.start"
## [81] "Segment.start" "RecBlock"
## [83] "TimeInRecBlock" "Time.normWord"
## [85] "Time.normSegment" "Position.norm"
## [87] "RestPosition.norm" "RelPos.norm"
## [89] "Position.raw" "RestPosition.raw"
## [91] "RelPos.raw" "F1"
## [93] "F2" "F1.norm"
## [95] "F2.norm" "F1.man"
## [97] "F2.man" "F1.man.norm"
## [99] "F2.man.norm" "RPDistT1T2.raw"
## [101] "RPDistT2T3.raw" "RPDistT1T3.raw"
The following subsections show some descriptives for the dataset.
subj = unique(dat[,c("Speaker","Group","Gender","YearBirth","RPDistT1T2.raw","RPDistT2T3.raw","RPDistT1T3.raw")])
table(subj$Group,subj$Gender)
##
## F M
## TerApel 6 9
## Ubbergen 2 17
cat(paste('Average year of birth for Ter Apel speakers:',
round(mean(subj[subj$Group=='TerApel',]$YearBirth),2)))
## Average year of birth for Ter Apel speakers: 1996.6
cat(paste('Average year of birth for Ubbergen speakers:',
round(mean(subj[subj$Group=='Ubbergen',]$YearBirth),2)))
## Average year of birth for Ubbergen speakers: 1996.47
cat(paste('Average T1-T3 distance for Ter Apel speakers:',
round(mean(subj[subj$Group=='TerApel',]$RPDistT1T3.raw),1)))
## Average T1-T3 distance for Ter Apel speakers: 23.5
cat(paste('Average year of birth for Ubbergen speakers:',
round(mean(subj[subj$Group=='Ubbergen',]$RPDistT1T3.raw),1)))
## Average year of birth for Ubbergen speakers: 24.2
par(mfrow=c(1,3))
boxplot(RPDistT1T3.raw ~ Group, data=subj, main='Distance T1-T3 (mm.)')
boxplot(RPDistT1T2.raw ~ Group, data=subj, main='Distance T1-T2 (mm.)')
boxplot(RPDistT2T3.raw ~ Group, data=subj, main='Distance T2-T3 (mm.)')
wilcox.test(RPDistT1T3.raw ~ Group, data=subj)
##
## Wilcoxon rank sum test
##
## data: RPDistT1T3.raw by Group
## W = 117, p-value = 0.3908
## alternative hypothesis: true location shift is not equal to 0
The following graph shows the distribution of the sounds (categorized as front, center, back) for the dialect words per group.
# relative proportions
m = matrix(c(0.389,0.271,0.144,0.204,0.111,0.129,0.171,0.164,0.04,0.111,0.144,0.121),nrow=2,ncol=6,byrow=T)
dimnames(m) = list(c("Consonants","Vowels"),c("TA (front)","UB (front)","TA (center)","UB (center)","TA (back)","UB (back)"))
barplot(m, col='white', axes=F, axisnames=F, yaxp=c(0,1,2), las=1,ylim=c(0,0.6))
cols1=c('cadetblue3','tomato4','cadetblue3','tomato4','cadetblue3','tomato4')
cols2=c('cadetblue1','tomato','cadetblue1','tomato','cadetblue1','tomato')
# add coloured bars
for (i in 1:ncol(m)){
xx = m
xx[,-i] <- NA
colnames(xx)[-i] <- NA
barplot(xx,col=c(cols1[i],cols2[i]), add=T, axes=F)
}
legend('topright',c('Consonants (TA)','Vowels (TA)','Consonants (UB)','Vowels (UB)'),fill=c('cadetblue3','cadetblue1','tomato4','tomato'))
axis(2)
box()
In the following, several models are fitted for dialect words and CVC sequences (e.g., taat: [tat] and poop [pop]). For each word three models are fitted, the first to determine the rho value (for correcting autocorrelation in the residuals), the second model which corrects for the autocorrelation using the predetermined rho value. The third model is fit after model comparison has been conducted. Since the residuals were not completely adequate (some heteroscedasticity and non-normal distribution), we excluded the data for which the absolute standardized residuals of the model were greater than 2.5 SD (i.e. the data points for which the difference between the actual and fitted value was largest).
words = c('taarten','bogen','tol','kameel','taat','poop')
for (word in words) {
# select subset of data, but keep ordered factors the same as original, otherwise contrasts get reset
subdat = droplevels(dat[dat$Word == word,],except=colnames(dat)[sapply(dat,is.ordered)])
## Fit models with DV RelPos.norm
# fit first model to determine autocorrelation in residuals
model0.rel <- bam(RelPos.norm ~ s(Time.normWord,by=GroupTypeSensorAxis,k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1),
data=subdat, method='fREML', gc.level=2, discrete=T, nthreads=32)
# assess autocorrelation in residuals
model0acf.rel = acf(resid(model0.rel),plot=F)
rhoval = as.vector(model0acf.rel[1]$acf)
# fit model which corrects for autocorrelation in residuals
model.rel <- bam(RelPos.norm ~ s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1),
data=subdat, method='fREML', gc.level=2, AR.start=Word.start, rho=rhoval,
discrete=T, nthreads=32)
smry.rel <- summary(model.rel)
save(model.rel,smry.rel,file=paste(word,'-group-rel.rda',sep=''))
# Model criticism (using gam.check, not shown as it clutters the output) shows
# that there is residuals are not optimally distributed (some heteroscedasticity,
# and not completely normally distributed). Consequently, we apply model criticism
# and refit the models on the basis of that data. Approximately 2-2.5% of the data
# is excluded (this data is not fit adequately by the model, and excluding it will
# likely improve the fit for the rest of the data).
subdat2 <- droplevels(subdat[abs(scale(resid(model.rel))) < 2.5, ],
except=colnames(subdat)[sapply(subdat,is.ordered)])
model.rel <- bam(RelPos.norm ~ s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1),
data=subdat2, method='fREML', gc.level=2, AR.start=Word.start, rho=rhoval,
discrete=T, nthreads=32)
smry.rel <- summary(model.rel)
save(model.rel,smry.rel,file=paste(word,'-group-rel-mc.rda',sep=''))
## Fit models with DV Position.norm
# fit first model to determine autocorrelation in residuals
model0 <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord,by=GroupTypeSensorAxis,k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1),
data=subdat, method='fREML', gc.level=2, discrete=T, nthreads=32)
# assess autocorrelation in residuals
model0acf = acf(resid(model0),plot=F)
rhoval = as.vector(model0acf[1]$acf)
# fit model which corrects for autocorrelation in residuals
model <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1),
data=subdat, method='fREML', gc.level=2, AR.start=Word.start, rho=rhoval,
discrete=T, nthreads=32)
smry <- summary(model)
save(model,smry,file=paste(word,'-group.rda',sep=''))
# Model criticism (see above)
subdat2 <- droplevels(subdat[abs(scale(resid(model))) < 2.5, ],
except=colnames(subdat)[sapply(subdat,is.ordered)])
model <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1),
data=subdat2, method='fREML', gc.level=2, AR.start=Word.start, rho=rhoval,
discrete=T, nthreads=32)
smry <- summary(model)
save(model,smry,file=paste(word,'-group-mc.rda',sep=''))
}
The visualization shows a two-dimensional representation of the fit of the model. As the model relative to the resting position yields values similar for the three sensors, this requires separate plots for each sensor. For the normalized position a single figure is sufficient, as it is normalized within the mouth (i.e. the T3 sensor is more back than T2, which is more back than T1).
par(mfrow=c(4,3))
for (word in c('taarten','bogen','tol','kameel')) {
if (!file.exists(paste(word,'-group-rel-mc.rda',sep=''))) {
download.file(paste('http://www.let.rug.nl/wieling/DiaArt/',word,'-group-rel-mc.rda',sep=''),
paste(word,'-group-rel-mc.rda',sep=''))
}
load(paste(word,'-group-rel-mc.rda',sep=''))
type = unique(dat[dat$Word==word,]$Type)
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c(paste('TerApel.',type,'.T1.P',sep=''),paste('Ubbergen.',type,'.T1.P',sep='')),
catlevels.y=c(paste('TerApel.',type,'.T1.H',sep=''),paste('Ubbergen.',type,'.T1.H',sep='')),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main=paste('Rel. position of the T1 sensor: "',word,'"',sep=''),
xlab='Posterior position', ylab='Height', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c(paste('TerApel.',type,'.T2.P',sep=''),paste('Ubbergen.',type,'.T2.P',sep='')),
catlevels.y=c(paste('TerApel.',type,'.T2.H',sep=''),paste('Ubbergen.',type,'.T2.H',sep='')),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main=paste('Rel. position of the T2 sensor: "',word,'"',sep=''),
xlab='Posterior position', ylab='Height', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c(paste('TerApel.',type,'.T3.P',sep=''),paste('Ubbergen.',type,'.T3.P',sep='')),
catlevels.y=c(paste('TerApel.',type,'.T3.H',sep=''),paste('Ubbergen.',type,'.T3.H',sep='')),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main=paste('Rel. position of the T3 sensor: "',word,'"',sep=''),
xlab='Posterior position', ylab='Height', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
}
par(mfrow=c(2,3))
for (word in c('taat','poop')) {
if (!file.exists(paste(word,'-group-rel-mc.rda',sep=''))) {
download.file(paste('http://www.let.rug.nl/wieling/DiaArt/',word,'-group-rel-mc.rda',sep=''),
paste(word,'-group-rel-mc.rda',sep=''))
}
load(paste(word,'-group-rel-mc.rda',sep=''))
if (word == 'taat') {
taatmodelrel = model.rel
}
type = unique(dat[dat$Word==word,]$Type)
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c(paste('TerApel.',type,'.T1.P',sep=''),paste('Ubbergen.',type,'.T1.P',sep='')),
catlevels.y=c(paste('TerApel.',type,'.T1.H',sep=''),paste('Ubbergen.',type,'.T1.H',sep='')),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main=paste('Rel. position of the T1 sensor: "',word,'"',sep=''),
xlab='Posterior position', ylab='Height', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c(paste('TerApel.',type,'.T2.P',sep=''),paste('Ubbergen.',type,'.T2.P',sep='')),
catlevels.y=c(paste('TerApel.',type,'.T2.H',sep=''),paste('Ubbergen.',type,'.T2.H',sep='')),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main=paste('Rel. position of the T2 sensor: "',word,'"',sep=''),
xlab='Posterior position', ylab='Height', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c(paste('TerApel.',type,'.T3.P',sep=''),paste('Ubbergen.',type,'.T3.P',sep='')),
catlevels.y=c(paste('TerApel.',type,'.T3.H',sep=''),paste('Ubbergen.',type,'.T3.H',sep='')),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main=paste('Rel. position of the T3 sensor: "',word,'"',sep=''),
xlab='Posterior position', ylab='Height', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
}
par(mfrow=c(length(words)/2,2))
for (word in words) {
if (!file.exists(paste(word,'-group.rda',sep=''))) {
download.file(paste('http://www.let.rug.nl/wieling/DiaArt/',word,'-group.rda',sep=''),
paste(word,'-group.rda',sep=''))
}
load(paste(word,'-group.rda',sep=''))
type = unique(dat[dat$Word==word,]$Type)
plotArt2D(model, catvar='GroupTypeSensorAxis',
catlevels.x=c(paste('TerApel.',type,'.T3.P',sep=''),paste('Ubbergen.',type,'.T3.P',sep=''),
paste('TerApel.',type,'.T2.P',sep=''),paste('Ubbergen.',type,'.T2.P',sep=''),
paste('TerApel.',type,'.T1.P',sep=''),paste('Ubbergen.',type,'.T1.P',sep='')),
catlevels.y=c(paste('TerApel.',type,'.T3.H',sep=''),paste('Ubbergen.',type,'.T3.H',sep=''),
paste('TerApel.',type,'.T2.H',sep=''),paste('Ubbergen.',type,'.T2.H',sep=''),
paste('TerApel.',type,'.T1.H',sep=''),paste('Ubbergen.',type,'.T1.H',sep='')),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main=paste('Position of the three tongue sensors: "',word,'"',sep=''),
xlab='Posterior position', ylab='Height', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15)
}
The following subsections show the formal assessment of the differences between the two dialect regions for the word “taat”. First by visualization, then by using binary difference curves, and finally by ordered factors.
The following plots provide a visual impression of the differences.
par(mfrow=c(2,2))
plotSmooths(taatmodelrel,xvar='Time.normWord',catvar='GroupTypeSensorAxis',
catlevels=c('TerApel.Standard.T1.P','Ubbergen.Standard.T1.P'),
dropRanef='SpeakerSensorAxis',ylim=c(-0.5,1),legendPos='topleft',
main='T1 posterior position (relative): "taat"',xlab='Time (normalized)',
ylab='Posterior position',legendlabels=c('Ter Apel','Ubbergen'),
legendtitle='Group', colors=c('cadetblue3','tomato4'),cexPoints=1,
alphaPoints=0.04,showPoints=T)
plotSmooths(taatmodelrel,xvar='Time.normWord',catvar='GroupTypeSensorAxis',
catlevels=c('TerApel.Standard.T1.H','Ubbergen.Standard.T1.H'),
dropRanef='SpeakerSensorAxis',ylim=c(-0.5,1),legendPos='topleft',
main='T1 height (relative): "taat"',xlab='Time (normalized)',
ylab='Height',legendlabels=c('Ter Apel','Ubbergen'),
legendtitle='Group',colors=c('cadetblue3','tomato4'),cexPoints=1,
alphaPoints=0.04,showPoints=T)
plot_diff(taatmodelrel, view='Time.normWord', comp=list(GroupTypeSensorAxis =
c("TerApel.Standard.T1.P","Ubbergen.Standard.T1.P")), rm.ranef=T, print.summary=F,
main='T1 posterior position (rel.): Ter Apel vs. Ubbergen', xlab='Time (normalized)',
ylab='Posterior position difference', ylim=c(-0.325,0.325)); box()
plot_diff(taatmodelrel, view='Time.normWord', comp=list(GroupTypeSensorAxis =
c("TerApel.Standard.T1.H","Ubbergen.Standard.T1.H")), rm.ranef=T, print.summary=F,
main='T1 height (rel.): Ter Apel vs. Ubbergen', xlab='Time (normalized)',
ylab='Height difference', ylim=c(-0.325,0.325)); box()
load('taat-group.rda')
taatmodel = model
par(mfrow=c(2,2))
plotSmooths(taatmodel,xvar='Time.normWord',catvar='GroupTypeSensorAxis',
catlevels=c('TerApel.Standard.T1.P','Ubbergen.Standard.T1.P'),
dropRanef='SpeakerSensorAxis',ylim=c(0.05,1),legendPos='topleft',
main='T1 posterior position: "taat"',xlab='Time (normalized)',
ylab='Posterior position',legendlabels=c('Ter Apel','Ubbergen'),
legendtitle='Group', colors=c('cadetblue3','tomato4'),cexPoints=1,
alphaPoints=0.04,showPoints=T)
plotSmooths(taatmodel,xvar='Time.normWord',catvar='GroupTypeSensorAxis',
catlevels=c('TerApel.Standard.T1.H','Ubbergen.Standard.T1.H'),
dropRanef='SpeakerSensorAxis',ylim=c(0.05,1),legendPos='topleft',
main='T1 height: "taat"',xlab='Time (normalized)',
ylab='Height',legendlabels=c('Ter Apel','Ubbergen'),
legendtitle='Group',colors=c('cadetblue3','tomato4'),cexPoints=1,
alphaPoints=0.04,showPoints=T)
plot_diff(taatmodel, view='Time.normWord', comp=list(GroupTypeSensorAxis =
c("TerApel.Standard.T1.P","Ubbergen.Standard.T1.P")), rm.ranef=T, print.summary=F,
main='T1 posterior position: Ter Apel vs. Ubbergen', xlab='Time (normalized)',
ylab='Posterior position difference', ylim=c(-0.325,0.325)); box()
plot_diff(taatmodel, view='Time.normWord', comp=list(GroupTypeSensorAxis =
c("TerApel.Standard.T1.H","Ubbergen.Standard.T1.H")), rm.ranef=T, print.summary=F,
main='T1 height: Ter Apel vs. Ubbergen', xlab='Time (normalized)',
ylab='Height difference', ylim=c(-0.325,0.325)); box()
We can use ordered factors to formally test if there are intercept and non-linear differences and assess the significance of each of them separately.
subdat = droplevels(dat[dat$Word == 'taat',],except=colnames(dat)[sapply(dat,is.ordered)])
# fit first model to determine autocorrelation in residuals
model0 <- bam(RelPos.norm ~ s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1), data=subdat,
method='fREML', discrete=T)
# assess autocorrelation in residuals
model0acf = acf(resid(model0),plot=F)
rhoval = as.vector(model0acf[1]$acf)
# fit model which corrects for autocorrelation in residuals
modelof.rel <- bam(RelPos.norm ~ s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1), data=subdat, method='fREML',
gc.level=2, AR.start=Word.start, rho=rhoval, discrete=T)
smryof.rel <- summary(modelof.rel)
save(modelof.rel,smryof.rel,file='taat-of-rel.rda')
# Model criticism (see above)
subdat2 <- droplevels(subdat[abs(scale(resid(modelof.rel))) < 2.5, ],
except=colnames(subdat)[sapply(subdat,is.ordered)])
modelof.rel <- bam(RelPos.norm ~ s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1), data=subdat2, method='fREML',
gc.level=2, AR.start=Word.start, rho=rhoval, discrete=T)
smryof.rel <- summary(modelof.rel)
save(modelof.rel,smryof.rel,file='taat-of-rel-mc.rda')
if (!file.exists('taat-of-rel-mc.rda')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/taat-of-rel-mc.rda',
'taat-of-rel-mc.rda')
}
load('taat-of-rel-mc.rda')
smryof.rel # show summary
##
## Family: gaussian
## Link function: identity
##
## Formula:
## RelPos.norm ~ s(Time.normWord, by = SensorAxis, k = 20) + SensorAxis +
## s(Time.normWord, by = IsTA.T1.PO, k = 20) + IsTA.T1.PO +
## s(Time.normWord, by = IsTA.T1.HO, k = 20) + IsTA.T1.HO +
## s(Time.normWord, by = IsTA.T2.PO, k = 20) + IsTA.T2.PO +
## s(Time.normWord, by = IsTA.T2.HO, k = 20) + IsTA.T2.HO +
## s(Time.normWord, by = IsTA.T3.PO, k = 20) + IsTA.T3.PO +
## s(Time.normWord, by = IsTA.T3.HO, k = 20) + IsTA.T3.HO +
## s(Time.normWord, SpeakerSensorAxis, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02206 0.03896 0.566 0.57121
## SensorAxisT2.P 0.03785 0.05511 0.687 0.49228
## SensorAxisT1.P 0.02112 0.05512 0.383 0.70168
## SensorAxisT3.H -0.34506 0.05507 -6.266 3.81e-10 ***
## SensorAxisT2.H -0.26100 0.05513 -4.734 2.22e-06 ***
## SensorAxisT1.H -0.16849 0.05513 -3.056 0.00224 **
## IsTA.T1.PO1 0.14920 0.05864 2.544 0.01096 *
## IsTA.T1.HO1 0.06325 0.05864 1.079 0.28081
## IsTA.T2.PO1 0.17742 0.05862 3.027 0.00248 **
## IsTA.T2.HO1 0.06615 0.05866 1.128 0.25947
## IsTA.T3.PO1 0.16851 0.05856 2.878 0.00401 **
## IsTA.T3.HO1 0.03586 0.05850 0.613 0.53991
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time.normWord):SensorAxisT3.P 15.383 17.242 13.074 < 2e-16 ***
## s(Time.normWord):SensorAxisT2.P 15.561 17.339 15.563 < 2e-16 ***
## s(Time.normWord):SensorAxisT1.P 16.007 17.605 16.708 < 2e-16 ***
## s(Time.normWord):SensorAxisT3.H 12.784 15.238 13.827 < 2e-16 ***
## s(Time.normWord):SensorAxisT2.H 16.271 17.748 32.754 < 2e-16 ***
## s(Time.normWord):SensorAxisT1.H 17.141 18.240 37.535 < 2e-16 ***
## s(Time.normWord):IsTA.T1.PO1 8.761 11.047 2.405 0.00582 **
## s(Time.normWord):IsTA.T1.HO1 8.813 11.152 1.704 0.06268 .
## s(Time.normWord):IsTA.T2.PO1 7.971 10.090 2.321 0.00979 **
## s(Time.normWord):IsTA.T2.HO1 9.848 12.286 4.542 1.81e-07 ***
## s(Time.normWord):IsTA.T3.PO1 6.855 8.722 2.022 0.03539 *
## s(Time.normWord):IsTA.T3.HO1 5.978 7.575 1.694 0.09952 .
## s(Time.normWord,SpeakerSensorAxis) 1445.737 1835.000 15.892 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.965 Deviance explained = 96.8%
## fREML = -44621 Scale est. = 0.0016276 n = 17639
# show model criticism plots, supress textual output
sink("/dev/null"); gam.check(modelof.rel); sink()
# fit first model to determine autocorrelation in residuals
model0 <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1), data=subdat,
method='fREML', discrete=T)
# assess autocorrelation in residuals
model0acf = acf(resid(model0),plot=F)
rhoval = as.vector(model0acf[1]$acf)
# fit model which corrects for autocorrelation in residuals
modelof <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1), data=subdat, method='fREML',
gc.level=2, AR.start=Word.start, rho=rhoval, discrete=T)
smryof <- summary(modelof)
save(modelof,smryof,file='taat-of.rda')
# Model criticism (see above)
subdat2 <- droplevels(subdat[abs(scale(resid(modelof.rel))) < 2.5, ],
except=colnames(subdat)[sapply(subdat,is.ordered)])
modelof <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord,SpeakerSensorAxis,bs='fs',m=1), data=subdat2, method='fREML',
gc.level=2, AR.start=Word.start, rho=rhoval, discrete=T)
smryof <- summary(modelof)
save(modelof,smryof,file='taat-of-mc.rda')
if (!file.exists('taat-of-mc.rda')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/taat-of-mc.rda',
'taat-of-mc.rda')
}
load('taat-of-mc.rda')
smryof # show summary
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Position.norm ~ RestPosition.norm + s(Time.normWord, by = SensorAxis,
## k = 20) + SensorAxis + s(Time.normWord, by = IsTA.T1.PO,
## k = 20) + IsTA.T1.PO + s(Time.normWord, by = IsTA.T1.HO,
## k = 20) + IsTA.T1.HO + s(Time.normWord, by = IsTA.T2.PO,
## k = 20) + IsTA.T2.PO + s(Time.normWord, by = IsTA.T2.HO,
## k = 20) + IsTA.T2.HO + s(Time.normWord, by = IsTA.T3.PO,
## k = 20) + IsTA.T3.PO + s(Time.normWord, by = IsTA.T3.HO,
## k = 20) + IsTA.T3.HO + s(Time.normWord, SpeakerSensorAxis,
## bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.522748 0.033251 15.722 < 2e-16 ***
## RestPosition.norm 0.197127 0.045055 4.375 1.22e-05 ***
## SensorAxisT2.P -0.195558 0.026460 -7.391 1.53e-13 ***
## SensorAxisT1.P -0.387222 0.032512 -11.910 < 2e-16 ***
## SensorAxisT3.H -0.085159 0.025422 -3.350 0.000810 ***
## SensorAxisT2.H -0.116492 0.023934 -4.867 1.14e-06 ***
## SensorAxisT1.H -0.188522 0.023897 -7.889 3.24e-15 ***
## IsTA.T1.PO1 0.082926 0.025042 3.311 0.000930 ***
## IsTA.T1.HO1 -0.000634 0.025351 -0.025 0.980048
## IsTA.T2.PO1 0.093312 0.024993 3.734 0.000189 ***
## IsTA.T2.HO1 0.025229 0.025213 1.001 0.317025
## IsTA.T3.PO1 0.070939 0.024896 2.849 0.004385 **
## IsTA.T3.HO1 0.016461 0.025102 0.656 0.511969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Time.normWord):SensorAxisT3.P 13.119 15.696 7.766 < 2e-16 ***
## s(Time.normWord):SensorAxisT2.P 13.568 16.032 9.372 < 2e-16 ***
## s(Time.normWord):SensorAxisT1.P 14.750 16.931 11.425 < 2e-16 ***
## s(Time.normWord):SensorAxisT3.H 13.200 15.633 17.215 < 2e-16 ***
## s(Time.normWord):SensorAxisT2.H 16.237 17.788 37.115 < 2e-16 ***
## s(Time.normWord):SensorAxisT1.H 17.305 18.384 48.388 < 2e-16 ***
## s(Time.normWord):IsTA.T1.PO1 5.525 7.005 1.380 0.20869
## s(Time.normWord):IsTA.T1.HO1 7.917 10.115 2.394 0.00834 **
## s(Time.normWord):IsTA.T2.PO1 4.943 6.227 1.661 0.12307
## s(Time.normWord):IsTA.T2.HO1 10.026 12.521 5.056 1.07e-08 ***
## s(Time.normWord):IsTA.T3.PO1 3.873 4.816 1.179 0.30566
## s(Time.normWord):IsTA.T3.HO1 7.032 8.912 2.659 0.00468 **
## s(Time.normWord,SpeakerSensorAxis) 1368.205 1823.000 5.886 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.964 Deviance explained = 96.7%
## fREML = -49814 Scale est. = 0.00096409 n = 17790
# show model criticism plots, supress textual output
sink("/dev/null"); gam.check(modelof); sink()
In the following the aggregate model is fitted. Four types of models are fitted: the first model is used to determine the rho value (for correcting autocorrelation in the residuals), the second model fits separate trajectories for each level (this model is only used for some visualizations), the third model fits the model using difference curves. The model with difference curves yields p-values (separately for the constant intercept difference and the non-linear difference) assessing if the difference between the two speaker groups and the two types of words (dialect words vs. CVC sequences) is significant or not. The fourth model attempts to simplify the ordered-factor model by excluding fixed-effect factors and/or SFs which were not significant and whose inclusion was not warranted (using AIC comparisons).
Two variants of visualization-model and the simplified ordered factor models are fitted. As the residuals of these models were not completely adequate (some heteroscedasticity and non-normal distribution), we excluded the data for which the absolute standardized residuals of the model were greater than 2.5 SD (i.e. the data points for which the difference between the actual and fitted value was greatest). The visualization and model summaries shown are based on these models (though note that the results are highly similar to the original models).
Here the models are fitted using the position relative to the non-speech resting position. The models are saved as each model takes about 7 hours to fit using 32 CPUs.
# fit first model to determine autocorrelation in residuals
system.time(modelNoRho.rel <- bam(RelPos.norm ~ s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat,
method='fREML', discrete=T, nthreads=32))
save(modelNoRho.rel,file='modelALL-rel-group-norho-1.8.12.rda')
# assess autocorrelation in residuals
modelACF.rel = acf(resid(modelNoRho.rel),plot=F)
rhoval = as.vector(modelACF.rel[1]$acf)
# fit model which corrects for autocorrelation in residuals
system.time(model.rel <- bam(RelPos.norm ~ s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat,
method='fREML', AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smry.rel <- summary(model.rel))
save(model.rel,smry.rel,file='modelALL-rel-group-1.8.12.rda')
# model criticism
dat2 <- droplevels(dat[abs(scale(resid(model.rel))) < 2.5, ],
except=colnames(dat)[sapply(dat,is.ordered)])
# model after model criticism
system.time(model.rel <- bam(RelPos.norm ~ s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat2,
method='fREML', AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smry.rel <- summary(model.rel))
save(model.rel,smry.rel,file='modelALL-rel-group-mc-1.8.12.rda')
# fit model with ordered factor difference curve
system.time(modelof.rel <- bam(RelPos.norm ~ s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO + s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO + s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO + s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord, by=IsCVC.T1.PO,k=20) + IsCVC.T1.PO + s(Time.normWord, by=IsCVC.T1.HO,k=20) + IsCVC.T1.HO +
s(Time.normWord, by=IsCVC.T2.PO,k=20) + IsCVC.T2.PO + s(Time.normWord, by=IsCVC.T2.HO,k=20) + IsCVC.T2.HO +
s(Time.normWord, by=IsCVC.T3.PO,k=20) + IsCVC.T3.PO + s(Time.normWord, by=IsCVC.T3.HO,k=20) + IsCVC.T3.HO +
s(Time.normWord, by=IsTACVC.T1.PO,k=20) + IsTACVC.T1.PO +
s(Time.normWord, by=IsTACVC.T1.HO,k=20) + IsTACVC.T1.HO +
s(Time.normWord, by=IsTACVC.T2.PO,k=20) + IsTACVC.T2.PO +
s(Time.normWord, by=IsTACVC.T2.HO,k=20) + IsTACVC.T2.HO +
s(Time.normWord, by=IsTACVC.T3.PO,k=20) + IsTACVC.T3.PO +
s(Time.normWord, by=IsTACVC.T3.HO,k=20) + IsTACVC.T3.HO +
s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat, method='fREML',
AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smryof.rel <- summary(modelof.rel))
save(modelof.rel,smryof.rel,file='modelALL-rel-of-1.8.12.rda')
# The results of the previous model show that only the IsTA fixed effects and IsCVC smooths need to be included
# Model comparison confirms this as the AIC value of the most complex model is only 0.34 AIC units
# lower than the simplest model fit here. The AIC difference threshold of a more complex model we use
# is 2, so we retain the simplest model.
system.time(modelof.rel <- bam(RelPos.norm ~ s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
IsTA.T1.PO + IsTA.T1.HO + IsTA.T2.PO + IsTA.T2.HO + IsTA.T3.PO + IsTA.T3.HO +
s(Time.normWord, by=IsCVC.T1.PO,k=20) + s(Time.normWord, by=IsCVC.T1.HO,k=20) +
s(Time.normWord, by=IsCVC.T2.PO,k=20) + s(Time.normWord, by=IsCVC.T2.HO,k=20) +
s(Time.normWord, by=IsCVC.T3.PO,k=20) + s(Time.normWord, by=IsCVC.T3.HO,k=20) +
s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat, method='fREML',
AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smryof.rel <- summary(modelof.rel))
save(modelof.rel,smryof.rel,file='modelALL-rel-of-simplest-1.8.12.rda')
# model criticism
dat2 <- droplevels(dat[abs(scale(resid(modelof.rel))) < 2.5, ],
except=colnames(dat)[sapply(dat,is.ordered)])
system.time(modelof.rel <- bam(RelPos.norm ~ s(Time.normWord, by=SensorAxis,k=20) + SensorAxis +
IsTA.T1.PO + IsTA.T1.HO + IsTA.T2.PO + IsTA.T2.HO + IsTA.T3.PO + IsTA.T3.HO +
s(Time.normWord, by=IsCVC.T1.PO,k=20) + s(Time.normWord, by=IsCVC.T1.HO,k=20) +
s(Time.normWord, by=IsCVC.T2.PO,k=20) + s(Time.normWord, by=IsCVC.T2.HO,k=20) +
s(Time.normWord, by=IsCVC.T3.PO,k=20) + s(Time.normWord, by=IsCVC.T3.HO,k=20) +
s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat, method='fREML',
AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smryof.rel <- summary(modelof.rel))
save(modelof.rel,smryof.rel,file='modelALL-rel-of-simplest-mc-1.8.12.rda')
Here the models are fitted using the normalized position. The models are saved as each model takes about 7 hours to fit using 32 CPUs.
# fit first model to determine autocorrelation in residuals
system.time(modelNoRho <- bam(Position.norm ~ RestPosition.norm +
s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat,
method='fREML', discrete=T, nthreads=32))
save(modelNoRho,file='modelALL-group-norho-1.8.12.rda')
# assess autocorrelation in residuals
modelACF = acf(resid(modelNoRho),plot=F)
rhoval = as.vector(modelACF[1]$acf)
# fit model which corrects for autocorrelation in residuals
system.time(model <- bam(Position.norm ~ RestPosition.norm +
s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat,
method='fREML', AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smry <- summary(model))
save(model,smry,file='modelALL-group-1.8.12.rda')
# model criticism
dat2 <- droplevels(dat[abs(scale(resid(model))) < 2.5, ],
except=colnames(dat)[sapply(dat,is.ordered)])
# fit model which corrects for autocorrelation in residuals
system.time(model <- bam(Position.norm ~ RestPosition.norm +
s(Time.normWord,by=GroupTypeSensorAxis, k=20) +
GroupTypeSensorAxis + s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat2,
method='fREML', AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smry <- summary(model))
save(model,smry,file='modelALL-group-mc-1.8.12.rda')
# fit model with ordered factor difference curve
system.time(modelof <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord, by=SensorAxis,k=20) +
SensorAxis + s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord, by=IsCVC.T1.PO,k=20) + IsCVC.T1.PO +
s(Time.normWord, by=IsCVC.T1.HO,k=20) + IsCVC.T1.HO +
s(Time.normWord, by=IsCVC.T2.PO,k=20) + IsCVC.T2.PO +
s(Time.normWord, by=IsCVC.T2.HO,k=20) + IsCVC.T2.HO +
s(Time.normWord, by=IsCVC.T3.PO,k=20) + IsCVC.T3.PO +
s(Time.normWord, by=IsCVC.T3.HO,k=20) + IsCVC.T3.HO +
s(Time.normWord, by=IsTACVC.T1.PO,k=20) + IsTACVC.T1.PO +
s(Time.normWord, by=IsTACVC.T1.HO,k=20) + IsTACVC.T1.HO +
s(Time.normWord, by=IsTACVC.T2.PO,k=20) + IsTACVC.T2.PO +
s(Time.normWord, by=IsTACVC.T2.HO,k=20) + IsTACVC.T2.HO +
s(Time.normWord, by=IsTACVC.T3.PO,k=20) + IsTACVC.T3.PO +
s(Time.normWord, by=IsTACVC.T3.HO,k=20) + IsTACVC.T3.HO +
s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat, method='fREML',
AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smryof <- summary(modelof))
save(modelof,smryof,file='modelALL-of-1.8.12.rda')
# The results of the previous model show that only the IsTA fixed effects need to be included
# Model comparison confirms this as the AIC value of the most complex model is not lower than
# the simpler model. Note that the IsTA and IsTACVC smooths are necessary still.
system.time(modelof <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord, by=SensorAxis,k=20) +
SensorAxis + s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord, by=IsCVC.T1.PO,k=20) +
s(Time.normWord, by=IsCVC.T1.HO,k=20) +
s(Time.normWord, by=IsCVC.T2.PO,k=20) +
s(Time.normWord, by=IsCVC.T2.HO,k=20) +
s(Time.normWord, by=IsCVC.T3.PO,k=20) +
s(Time.normWord, by=IsCVC.T3.HO,k=20) +
s(Time.normWord, by=IsTACVC.T1.PO,k=20) +
s(Time.normWord, by=IsTACVC.T1.HO,k=20) +
s(Time.normWord, by=IsTACVC.T2.PO,k=20) +
s(Time.normWord, by=IsTACVC.T2.HO,k=20) +
s(Time.normWord, by=IsTACVC.T3.PO,k=20) +
s(Time.normWord, by=IsTACVC.T3.HO,k=20) +
s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat, method='fREML',
AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smryof <- summary(modelof))
save(modelof,smryof,file='modelALL-of-simpler-1.8.12.rda')
# model criticism
dat2 <- droplevels(dat[abs(scale(resid(modelof))) < 2.5, ],
except=colnames(dat)[sapply(dat,is.ordered)])
# fit model with ordered factor difference curve
system.time(modelof <- bam(Position.norm ~ RestPosition.norm + s(Time.normWord, by=SensorAxis,k=20) +
SensorAxis + s(Time.normWord, by=IsTA.T1.PO,k=20) + IsTA.T1.PO +
s(Time.normWord, by=IsTA.T1.HO,k=20) + IsTA.T1.HO +
s(Time.normWord, by=IsTA.T2.PO,k=20) + IsTA.T2.PO +
s(Time.normWord, by=IsTA.T2.HO,k=20) + IsTA.T2.HO +
s(Time.normWord, by=IsTA.T3.PO,k=20) + IsTA.T3.PO +
s(Time.normWord, by=IsTA.T3.HO,k=20) + IsTA.T3.HO +
s(Time.normWord, by=IsCVC.T1.PO,k=20) +
s(Time.normWord, by=IsCVC.T1.HO,k=20) +
s(Time.normWord, by=IsCVC.T2.PO,k=20) +
s(Time.normWord, by=IsCVC.T2.HO,k=20) +
s(Time.normWord, by=IsCVC.T3.PO,k=20) +
s(Time.normWord, by=IsCVC.T3.HO,k=20) +
s(Time.normWord, by=IsTACVC.T1.PO,k=20) +
s(Time.normWord, by=IsTACVC.T1.HO,k=20) +
s(Time.normWord, by=IsTACVC.T2.PO,k=20) +
s(Time.normWord, by=IsTACVC.T2.HO,k=20) +
s(Time.normWord, by=IsTACVC.T3.PO,k=20) +
s(Time.normWord, by=IsTACVC.T3.HO,k=20) +
s(Time.normWord,SpeakerTypeSensorAxis,bs='fs',m=1) +
s(Time.normWord,WordGroupSensorAxis,bs='fs',m=1), data=dat2, method='fREML',
AR.start=Word.start, rho=rhoval, discrete=T, nthreads=32))
system.time(smryof <- summary(modelof))
save(modelof,smryof,file='modelALL-of-simpler-mc-1.8.12.rda')
if (!file.exists('modelALL-group-mc-1.8.12.rda')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/modelALL-group-mc-1.8.12.rda',
'modelALL-group-mc-1.8.12.rda')
}
load('modelALL-group-mc-1.8.12.rda')
if (!file.exists('modelALL-rel-group-mc-1.8.12.rda')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/modelALL-rel-group-mc-1.8.12.rda',
'modelALL-rel-group-mc-1.8.12.rda')
}
load('modelALL-rel-group-mc-1.8.12.rda')
if (!file.exists('modelALL-rel-group-norho-1.8.12.rda')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/modelALL-rel-group-norho-1.8.12.rda',
'modelALL-rel-group-norho-1.8.12.rda')
}
load('modelALL-rel-group-norho-1.8.12.rda')
if (!file.exists('modelALL-rel-of-simplest-mc-1.8.12.rda')) {
download.file('http://www.let.rug.nl/wieling/DiaArt/modelALL-rel-of-simplest-mc-1.8.12.rda',
'modelALL-rel-of-simplest-mc-1.8.12.rda')
}
load('modelALL-rel-of-simplest-mc-1.8.12.rda')
The following graph visualizes the individual variation.
plot(model.rel,select=25,ylab='Position (normalized)', xlab='Time (normalized)')
The following graph visualizes the autocorrelation.
par(mfrow=c(1,2))
acf_resid(modelNoRho.rel, main='Original autocorrelation in residuals',
ylab='Autocorrelation magnitude',max_lag=30)
acf_resid(model.rel, main='Corrected autocorrelation in residuals',
ylab='Autocorrelation magnitude',max_lag=30)
The following graphs visualize the tongue trajectories in two dimensions for the dialect words and the CVC sequences for the position relative to the resting position.
par(mfcol=c(3,2))
# plotArt2D does not work with ordered factors, so it needs the full model
# (which is a bit too complex)
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c('TerApel.Dialect.T1.P','Ubbergen.Dialect.T1.P'),
catlevels.y=c('TerApel.Dialect.T1.H','Ubbergen.Dialect.T1.H'),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main='Rel. position of the T1 sensor: dialect words',
xlab='Posterior position (rel.)', ylab='Height (rel.)', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c('TerApel.Dialect.T2.P','Ubbergen.Dialect.T2.P'),
catlevels.y=c('TerApel.Dialect.T2.H','Ubbergen.Dialect.T2.H'),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main='Rel. position of the T2 sensor: dialect words',
xlab='Posterior position (rel.)', ylab='Height (rel.)', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c('TerApel.Dialect.T3.P','Ubbergen.Dialect.T3.P'),
catlevels.y=c('TerApel.Dialect.T3.H','Ubbergen.Dialect.T3.H'),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main='Rel. position of the T3 sensor: dialect words',
xlab='Posterior position (rel.)', ylab='Height (rel.)', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c('TerApel.Standard.T1.P','Ubbergen.Standard.T1.P'),
catlevels.y=c('TerApel.Standard.T1.H','Ubbergen.Standard.T1.H'),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main='Rel. position of the T1 sensor: Dutch CVC seq.',
xlab='Posterior position (rel.)', ylab='Height (rel.)', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c('TerApel.Standard.T2.P','Ubbergen.Standard.T2.P'),
catlevels.y=c('TerApel.Standard.T2.H','Ubbergen.Standard.T2.H'),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main='Rel. position of the T2 sensor: Dutch CVC seq.',
xlab='Posterior position (rel.)', ylab='Height (rel.)', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))
plotArt2D(model.rel, catvar='GroupTypeSensorAxis',
catlevels.x=c('TerApel.Standard.T3.P','Ubbergen.Standard.T3.P'),
catlevels.y=c('TerApel.Standard.T3.H','Ubbergen.Standard.T3.H'),
timevar='Time.normWord', collabels=c('Ter Apel', 'Ubbergen'),
main='Rel. position of the T3 sensor: Dutch CVC seq.',
xlab='Posterior position (rel.)', ylab='Height (rel.)', showPoints=T,
cols=list(c("cadetblue3","cadetblue1"),c("tomato4","tomato")),alpha=15,xlim=c(-1,1),ylim=c(-1,1))