
Martijn Wieling
Computational Linguistics Research Group
Estimate Std. Error t value Pr(>|t|)
Linear regression DistOrigin -6.418e-05 1.808e-06 -35.49 <2e-16
+ Random intercepts DistOrigin -2.224e-05 6.863e-06 -3.240 <0.001
+ Random slopes DistOrigin -1.478e-05 1.519e-05 -0.973 n.s.
This example is explained at the HLP/Jaeger lab blog
lmer( RT ~ WF + WL + SA + (1+SA|Wrd) + (1+WF|Subj) )
lmer
automatically discovers random-effects structure (nested/crossed)lmer
do not suffer from shrinkagelmer
takes into account regression towards the mean (fast subjects will be slower next time, and slow subjects will be faster) thereby avoiding overfitting and improving prediction - see Efron & Morris (1977)acf(resid(model), main=" ", ylab="autocorrelation function")
baseheid
): base preceded neologism (fluffy - fluffiness)heid
): identity priming (fluffiness - fluffiness)heid
) faster than those in base condition (baseheid
)lme4
version 1.1.12)library(lme4)
dat <- read.table("datprevrt.txt", header = T) # adapted primingHeid data set
dat.lmer1 <- lmer(RT.log ~ Condition + (1 | Word) + (1 | Subject), data = dat)
summary(dat.lmer1, cor = F)
# Linear mixed model fit by REML ['lmerMod']
# Formula: RT.log ~ Condition + (1 | Word) + (1 | Subject)
# Data: dat
#
# REML criterion at convergence: -102
#
# Scaled residuals:
# Min 1Q Median 3Q Max
# -2.359 -0.691 -0.134 0.590 4.261
#
# Random effects:
# Groups Name Variance Std.Dev.
# Word (Intercept) 0.00341 0.0584
# Subject (Intercept) 0.04084 0.2021
# Residual 0.04408 0.2100
# Number of obs: 832, groups: Word, 40; Subject, 26
#
# Fixed effects:
# Estimate Std. Error t value
# (Intercept) 6.6030 0.0421 156.7
# Conditionheid 0.0313 0.0147 2.1
dat.lmer2 <- lmer(RT.log ~ Trial + Condition + (1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer2)$coef
# Estimate Std. Error t value
# (Intercept) 6.633384 4.67e-02 142.15
# Trial -0.000146 9.62e-05 -1.52
# Conditionheid 0.030977 1.47e-02 2.11
dat.lmer3 <- lmer(RT.log ~ PrevRT.log + Condition + (1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer3)$coef
# Estimate Std. Error t value
# (Intercept) 5.8047 0.2230 26.03
# PrevRT.log 0.1212 0.0334 3.63
# Conditionheid 0.0279 0.0146 1.90
dat.lmer4 <- lmer(RT.log ~ RTtoPrime.log + PrevRT.log + Condition + (1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer4)$coef
# Estimate Std. Error t value
# (Intercept) 4.74877 0.2953 16.080
# RTtoPrime.log 0.16379 0.0319 5.141
# PrevRT.log 0.11901 0.0330 3.605
# Conditionheid -0.00612 0.0160 -0.383
dat.lmer5 <- lmer(RT.log ~ RTtoPrime.log + ResponseToPrime + PrevRT.log + Condition + (1 | Subject) +
(1 | Word), data = dat)
summary(dat.lmer5)$coef
# Estimate Std. Error t value
# (Intercept) 4.7634 0.2923 16.30
# RTtoPrime.log 0.1650 0.0315 5.24
# ResponseToPrimeincorrect 0.1004 0.0226 4.45
# PrevRT.log 0.1142 0.0327 3.49
# Conditionheid -0.0178 0.0161 -1.11
dat.lmer6 <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + Condition + (1 | Subject) +
(1 | Word), data = dat)
summary(dat.lmer6)$coef
# Estimate Std. Error t value
# (Intercept) 4.3244 0.3152 13.72
# RTtoPrime.log 0.2276 0.0359 6.33
# ResponseToPrimeincorrect 1.4548 0.4052 3.59
# PrevRT.log 0.1183 0.0325 3.64
# Conditionheid -0.0266 0.0162 -1.64
# RTtoPrime.log:ResponseToPrimeincorrect -0.2025 0.0606 -3.34
dat.lmer7 <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition +
(1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer7)$varcor
# Groups Name Std.Dev.
# Word (Intercept) 0.0339
# Subject (Intercept) 0.1549
# Residual 0.2055
summary(dat.lmer7)$coef
# Estimate Std. Error t value
# (Intercept) 4.44098 0.31961 13.90
# RTtoPrime.log 0.21824 0.03615 6.04
# ResponseToPrimeincorrect 1.39705 0.40516 3.45
# PrevRT.log 0.11542 0.03246 3.56
# BaseFrequency -0.00924 0.00437 -2.11
# Conditionheid -0.02466 0.01618 -1.52
# RTtoPrime.log:ResponseToPrimeincorrect -0.19399 0.06055 -3.20
dat.lmer7a <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition +
(1 | Subject) + (0 + BaseFrequency | Subject) + (1 | Word), data = dat)
summary(dat.lmer7a)$coef
# Estimate Std. Error t value
# (Intercept) 4.48231 0.31736 14.12
# RTtoPrime.log 0.21812 0.03595 6.07
# ResponseToPrimeincorrect 1.41675 0.40206 3.52
# PrevRT.log 0.10849 0.03235 3.35
# BaseFrequency -0.00795 0.00535 -1.48
# Conditionheid -0.02453 0.01603 -1.53
# RTtoPrime.log:ResponseToPrimeincorrect -0.19667 0.06008 -3.27
AIC(dat.lmer7) - AIC(dat.lmer7a) # compare AIC
# [1] 3.65
# Alternative to AIC comparison: Likelihood Ratio Test
anova(dat.lmer7, dat.lmer7a, refit = F) # compares models
# Data: dat
# Models:
# dat.lmer7: RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency +
# dat.lmer7: Condition + (1 | Subject) + (1 | Word)
# dat.lmer7a: RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency +
# dat.lmer7a: Condition + (1 | Subject) + (0 + BaseFrequency | Subject) +
# dat.lmer7a: (1 | Word)
# Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
# dat.lmer7 10 -126 -78.3 72.8 -146
# dat.lmer7a 11 -129 -77.2 75.6 -151 5.65 1 0.017 *
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dat.lmer7b <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition +
(1 + BaseFrequency | Subject) + (1 | Word), data = dat)
summary(dat.lmer7b)$varcor
# Groups Name Std.Dev. Corr
# Word (Intercept) 0.0344
# Subject (Intercept) 0.1291
# BaseFrequency 0.0136 0.41
# Residual 0.2035
AIC(dat.lmer7a) - AIC(dat.lmer7b) # correlation parameter not necessary
# [1] -1.14
anova(dat.lmer7a, dat.lmer7b, refit = F)$P[2] # p-value from anova
# [1] 0.353
qqnorm(resid(dat.lmer7a))
qqline(resid(dat.lmer7a))
plot(fitted(dat.lmer7a), resid(dat.lmer7a))
dat2 <- dat[abs(scale(resid(dat.lmer7a))) < 2.5, ]
dat2.lmer7a <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition +
(1 | Subject) + (0 + BaseFrequency | Subject) + (1 | Word), data = dat2)
summary(dat2.lmer7a)$coef
# Estimate Std. Error t value
# (Intercept) 4.44735 0.28598 15.55
# RTtoPrime.log 0.23511 0.03200 7.35
# ResponseToPrimeincorrect 1.56001 0.35551 4.39
# PrevRT.log 0.09554 0.02926 3.27
# BaseFrequency -0.00815 0.00459 -1.78
# Conditionheid -0.03814 0.01435 -2.66
# RTtoPrime.log:ResponseToPrimeincorrect -0.21616 0.05316 -4.07
heid
condition are responded faster to than those in the baseheid
condition(noutliers <- sum(abs(scale(resid(dat.lmer7a))) >= 2.5))
# [1] 17
noutliers/nrow(dat)
# [1] 0.0204
cor(dat$RT, fitted(dat.lmer7a))^2
# [1] 0.521
cor(dat2$RT, fitted(dat2.lmer7a))^2
# [1] 0.572
library(car)
par(mfrow = c(1, 2))
qqp(resid(dat2.lmer7a))
plot(fitted(dat2.lmer7a), resid(dat2.lmer7a))
library(boot)
(bs.lmer7a <- confint(dat2.lmer7a, method = "boot", nsim = 1000, level = 0.95))
# 2.5 % 97.5 %
# .sig01 0.00e+00 0.040543
# .sig02 1.41e-05 0.021509
# .sig03 9.75e-02 0.190691
# .sigma 1.71e-01 0.189190
# (Intercept) 3.83e+00 4.994796
# RTtoPrime.log 1.74e-01 0.302521
# ResponseToPrimeincorrect 8.53e-01 2.255276
# PrevRT.log 3.66e-02 0.157534
# BaseFrequency -1.76e-02 0.000579
# Conditionheid -6.73e-02 -0.010342
# RTtoPrime.log:ResponseToPrimeincorrect -3.19e-01 -0.112836
lmer
to conduct mixed-effects regressionlmer
and why these are essential when you have multiple responses per subject or itemThank you for your attention!