
Martijn Wieling
University of Groningen
Estimate Std. Error t value Pr(>|t|)
Linear regression DistOrigin -6.418e-05 1.808e-06 -35.49 <2e-16
+ Random intercepts DistOrigin -2.224e-05 6.863e-06 -3.240 <0.001
+ Random slopes DistOrigin -1.478e-05 1.519e-05 -0.973 n.s.
(This example is explained at the HLP/Jaeger lab blog)
lmer( RT ~ WF + WL + SA + (1+SA|Wrd) + (1+WF|Subj) )
lmer
automatically discovers random-effects structure: nested/crossed)Subject | Intercept | WF slope |
---|---|---|
S1 | 525 | -2 |
S2 | 400 | -1 |
S3 | 500 | -2 |
S4 | 550 | -3 |
S5 | 450 | -2 |
S6 | 600 | -4 |
S7 | 300 | 0 |
lmer
do not suffer from shrinkagelmer
takes into account regression towards the mean (fast subjects will be slower next time, and slow subjects will be faster) thereby avoiding overfitting and improving prediction - see Efron & Morris (1977)load("lls.rda")
head(lls)
# SID BAC Sex BirthYear L2cnt SelfEN LivedEN L2anxiety Edu LID Lang Rating
# 1 S0045188-17 0.392 F -5.66 -1.04 0.463 Y 0.357 0.817 L0637009 NL 0.946
# 2 S0045188-17 0.392 F -5.66 -1.04 0.463 Y 0.357 0.817 L196 EN 0.330
# 3 S0045188-17 0.392 F -5.66 -1.04 0.463 Y 0.357 0.817 L86 EN -0.298
# 4 S0045188-17 0.392 F -5.66 -1.04 0.463 Y 0.357 0.817 L0614758 NL 0.325
# 5 S0045188-17 0.392 F -5.66 -1.04 0.463 Y 0.357 0.817 L220 EN -0.435
# 6 S0045188-17 0.392 F -5.66 -1.04 0.463 Y 0.357 0.817 L225 EN -0.239
lme4
version 1.1.31)library(lme4)
m <- lmer(Rating ~ BAC + (1 | SID) + (1 | LID), data = lls) # does not converge
# boundary (singular) fit: see help('isSingular')
summary(m)$coef # show fixed effects
# Estimate Std. Error t value
# (Intercept) 0.144 0.0638 2.252
# BAC -0.221 0.3262 -0.677
summary(m)$varcor # show random-effects part only: no variability per listener (due to z-scaling)
# Groups Name Std.Dev.
# LID (Intercept) 0.000
# SID (Intercept) 0.560
# Residual 0.738
m1 <- lmer(Rating ~ Lang * BAC + (1 | SID), data = lls)
summary(m1, cor = F) # suppress expected correlation of regression coefficients; Note: |t| > 2 => p < .05 (N > 100)
# Linear mixed model fit by REML ['lmerMod']
# Formula: Rating ~ Lang * BAC + (1 | SID)
# Data: lls
#
# REML criterion at convergence: 5853
#
# Scaled residuals:
# Min 1Q Median 3Q Max
# -3.668 -0.647 0.032 0.707 3.187
#
# Random effects:
# Groups Name Variance Std.Dev.
# SID (Intercept) 0.305 0.552
# Residual 0.533 0.730
# Number of obs: 2541, groups: SID, 82
#
# Fixed effects:
# Estimate Std. Error t value
# (Intercept) 0.0883 0.0635 1.39
# LangNL 0.2430 0.0358 6.78
# BAC -0.0592 0.3255 -0.18
# LangNL:BAC -0.7286 0.1938 -3.76
library(visreg)
visreg(m1, "BAC", by = "Lang", overlay = T, xlab = "BAC (centered)", ylab = "Rating")
m0 <- lmer(Rating ~ Lang + (1 | SID), data = lls) # comparison: best simpler model (BAC n.s.)
anova(m0, m1) # interaction necessary
# Data: lls
# Models:
# m0: Rating ~ Lang + (1 | SID)
# m1: Rating ~ Lang * BAC + (1 | SID)
# npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
# m0 4 5866 5889 -2929 5858
# m1 6 5855 5890 -2921 5843 14.7 2 0.00063 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- lmer(Rating ~ Lang * BAC + (1 + Lang | SID), data = lls)
anova(m1, m2, refit = FALSE) # random slope necessary? (REML model comparison)
# Data: lls
# Models:
# m1: Rating ~ Lang * BAC + (1 | SID)
# m2: Rating ~ Lang * BAC + (1 + Lang | SID)
# npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
# m1 6 5865 5900 -2927 5853
# m2 8 5626 5672 -2805 5610 244 2 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lmer
fitting method) is appropriate when comparing 2 models differing exclusively in the random-effects structure
anova
automatically refits the models with ML, unless refit=F
is specifiedsummary(m2, cor = F)$coef # show only fixed effects of model with random slope
# Estimate Std. Error t value
# (Intercept) 0.0988 0.0747 1.322
# LangNL 0.2544 0.0815 3.121
# BAC 0.0994 0.3925 0.253
# LangNL:BAC -0.9875 0.4265 -2.316
summary(m1, cor = F)$coef # results of model without random slope
# Estimate Std. Error t value
# (Intercept) 0.0883 0.0635 1.391
# LangNL 0.2430 0.0358 6.785
# BAC -0.0592 0.3255 -0.182
# LangNL:BAC -0.7286 0.1938 -3.759
summary(m2, cor = F)$varcor # show only random effects of model with random slope
# Groups Name Std.Dev. Corr
# SID (Intercept) 0.653
# LangNL 0.653 -0.83
# Residual 0.683
summary(m1, cor = F)$varcor # results of model without random slope
# Groups Name Std.Dev.
# SID (Intercept) 0.552
# Residual 0.730
(1+X|RF)
|
are assumed to be correlated and a correlation parameter is calculated for each pair(1|RF) + (0+X|RF)
(alternatively: (1+X||RF)
)
refit=FALSE
) is used to determine the best modelLang
) should always be grouped with the intercept: (1+Lang|SID)
, but not: (1|SID) + (0 + Lang|SID)
plot(coef(m2)$SID[, "(Intercept)"], coef(m2)$SID[, "LangNL"], xlab = "By-subject random intercept",
ylab = "By-subject random slope for language contrast")
visreg(m2, "BAC", by = "Lang", overlay = T, xlab = "BAC (centered)", ylab = "Rating")
m2b <- lmer(Rating ~ Lang:BAC + Lang + (1 + Lang | SID), data = lls) # instead of Lang * BAC
summary(m2b)$coef # effect of BAC per language separately
# Estimate Std. Error t value
# (Intercept) 0.0988 0.0747 1.322
# LangNL 0.2544 0.0815 3.121
# LangEN:BAC 0.0994 0.3925 0.253
# LangNL:BAC -0.8882 0.2684 -3.309
summary(m2)$coef # original model: effect of BAC for NL vs EN
# Estimate Std. Error t value
# (Intercept) 0.0988 0.0747 1.322
# LangNL 0.2544 0.0815 3.121
# BAC 0.0994 0.3925 0.253
# LangNL:BAC -0.9875 0.4265 -2.316
m3 <- lmer(Rating ~ Lang * BAC + Sex + (1 + Lang | SID), data = lls)
summary(m3)$coef
# Estimate Std. Error t value
# (Intercept) 0.1347 0.0870 1.549
# LangNL 0.2532 0.0815 3.107
# BAC 0.1325 0.3987 0.332
# SexM -0.0815 0.0973 -0.838
# LangNL:BAC -0.9801 0.4265 -2.298
anova(m2, m3)$P[2] # p-value from anova: Sex not necessary (also not in interaction; not shown)
# [1] 0.404
m3 <- lmer(Rating ~ Lang * BAC + BirthYear + (1 + Lang | SID), data = lls)
summary(m3)$coef
# Estimate Std. Error t value
# (Intercept) 0.09969 0.07472 1.334
# LangNL 0.25462 0.08171 3.116
# BAC 0.08313 0.39269 0.212
# BirthYear 0.00603 0.00495 1.217
# LangNL:BAC -0.99645 0.42764 -2.330
anova(m2, m3)$P[2] # BirthYear not necessary (also not in interaction with language; not shown)
# [1] 0.217
m3 <- lmer(Rating ~ Lang * BAC + Edu + (1 + Lang | SID), data = lls)
summary(m3)$coef
# Estimate Std. Error t value
# (Intercept) 0.102 0.0709 1.443
# LangNL 0.255 0.0817 3.115
# BAC 0.200 0.3734 0.536
# Edu 0.112 0.0341 3.288
# LangNL:BAC -1.034 0.4266 -2.424
anova(m2, m3)$P[2] # Education necessary (but not in interaction with language; not shown)
# [1] 0.00139
m4 <- lmer(Rating ~ Lang * BAC + Edu + LivedEN + (1 + Lang | SID), data = lls)
summary(m4)$coef
# Estimate Std. Error t value
# (Intercept) 0.0831 0.0732 1.134
# LangNL 0.2526 0.0817 3.091
# BAC 0.2027 0.3708 0.546
# Edu 0.1126 0.0342 3.290
# LivedENY 0.1116 0.1147 0.973
# LangNL:BAC -1.0368 0.4264 -2.432
anova(m3, m4)$P[2] # LivedEN not necessary (also not in interaction with language; not shown)
# [1] 0.327
m4 <- lmer(Rating ~ Lang * BAC + Edu + SelfEN + (1 + Lang | SID), data = lls)
summary(m4)$coef
# Estimate Std. Error t value
# (Intercept) 0.1077 0.0644 1.671
# LangNL 0.2491 0.0824 3.021
# BAC 0.1675 0.3401 0.493
# Edu 0.0758 0.0343 2.211
# SelfEN 0.1299 0.0370 3.507
# LangNL:BAC -1.0967 0.4283 -2.561
anova(m3, m4)$P[2] # SelfEN necessary
# [1] 0.00135
m5 <- lmer(Rating ~ Lang * BAC + Edu + SelfEN * Lang + (1 + Lang | SID), data = lls)
summary(m5)$coef
# Estimate Std. Error t value
# (Intercept) 0.1087 0.0612 1.775
# LangNL 0.2474 0.0753 3.285
# BAC 0.0665 0.3236 0.206
# Edu 0.0741 0.0344 2.157
# SelfEN 0.2874 0.0541 5.309
# LangNL:BAC -0.9385 0.3933 -2.386
# LangNL:SelfEN -0.2519 0.0630 -3.996
anova(m4, m5)$P[2] # interaction necessary
# [1] 0.000103
visreg(m5, "SelfEN", by = "Lang", overlay = T, xlab = "EN self rating (centered)", ylab = "Rating")
m6 <- lmer(Rating ~ Lang * BAC + Edu + SelfEN * Lang + L2cnt + (1 + Lang | SID), data = lls)
summary(m6)$coef
# Estimate Std. Error t value
# (Intercept) 0.1080 0.0599 1.802
# LangNL 0.2500 0.0755 3.314
# BAC 0.1092 0.3172 0.344
# Edu 0.0635 0.0339 1.875
# SelfEN 0.2787 0.0531 5.246
# L2cnt 0.1012 0.0467 2.168
# LangNL:BAC -0.9356 0.3937 -2.377
# LangNL:SelfEN -0.2499 0.0632 -3.955
anova(m5, m6)$P[2] # L2cnt necessary
# [1] 0.028
Edu
now does not have an absolute \(t\)-value > 2 anymore, but we retain it as it is close to significance (\(p \approx 0.06\) based on anova
)m7 <- lmer(Rating ~ Lang * BAC + Edu + SelfEN * Lang + L2cnt * Lang + (1 + Lang | SID),
data = lls)
summary(m7)$coef
# Estimate Std. Error t value
# (Intercept) 0.1076 0.0601 1.792
# LangNL 0.2501 0.0758 3.300
# BAC 0.1237 0.3183 0.389
# Edu 0.0630 0.0338 1.863
# SelfEN 0.2746 0.0536 5.127
# L2cnt 0.1340 0.0677 1.979
# LangNL:BAC -0.9640 0.3972 -2.427
# LangNL:SelfEN -0.2432 0.0642 -3.785
# LangNL:L2cnt -0.0576 0.0861 -0.668
anova(m6, m7)$P[2] # no interaction necessary: L2cnt affects both languages similarly
# [1] 0.49
m7 <- lmer(Rating ~ Lang * BAC + Edu + SelfEN * Lang + L2cnt + L2anxiety + (1 + Lang |
SID), data = lls)
summary(m7)$coef
# Estimate Std. Error t value
# (Intercept) 0.1098 0.0580 1.892
# LangNL 0.2453 0.0755 3.251
# BAC 0.1042 0.3072 0.339
# Edu 0.0606 0.0327 1.854
# SelfEN 0.2026 0.0599 3.384
# L2cnt 0.1202 0.0456 2.638
# L2anxiety -0.2277 0.0918 -2.481
# LangNL:BAC -0.9463 0.3931 -2.407
# LangNL:SelfEN -0.2526 0.0632 -3.998
anova(m6, m7)$P[2] # L2anxiety necessary
# [1] 0.0118
m8 <- lmer(Rating ~ Lang * BAC + Edu + SelfEN * Lang + L2cnt + L2anxiety * Lang + (1 +
Lang | SID), data = lls)
summary(m8)$coef
# Estimate Std. Error t value
# (Intercept) 0.1103 0.0581 1.900
# LangNL 0.2462 0.0756 3.259
# BAC 0.0949 0.3075 0.309
# Edu 0.0607 0.0327 1.857
# SelfEN 0.1721 0.0680 2.531
# L2cnt 0.1206 0.0456 2.644
# L2anxiety -0.3202 0.1340 -2.389
# LangNL:BAC -0.9293 0.3940 -2.359
# LangNL:SelfEN -0.1973 0.0861 -2.292
# LangNL:L2anxiety 0.1653 0.1747 0.946
anova(m7, m8)$P[2] # no interaction necessary: L2anxiety affects both languages similarly
# [1] 0.337
m7
is the best model! library(car)
vif(m7) # Should be lower < 5 (for centered numerical variables): OK
# Lang BAC Edu SelfEN L2cnt L2anxiety Lang:BAC
# 1.00 2.32 1.19 3.40 1.08 1.94 2.30
# Lang:SelfEN
# 2.34
acf(resid(m7)) # no autocorrelation
qqnorm(resid(m7)) # OK
qqline(resid(m7))
plot(fitted(m7), resid(m7)) # some heteroscedasticity
lls2 <- lls[abs(scale(resid(m7))) < 2.5, ] # trimmed model: 98.5% of original data
m7.2 <- lmer(Rating ~ Lang * BAC + Edu + SelfEN * Lang + L2cnt + L2anxiety + (1 + Lang |
SID), data = lls2)
summary(m7.2)$coef
# Estimate Std. Error t value
# (Intercept) 0.1161 0.0594 1.955
# LangNL 0.2493 0.0758 3.289
# BAC 0.1342 0.3145 0.427
# Edu 0.0627 0.0332 1.887
# SelfEN 0.2055 0.0612 3.360
# L2cnt 0.1245 0.0463 2.687
# L2anxiety -0.2451 0.0933 -2.626
# LangNL:BAC -0.9850 0.3948 -2.495
# LangNL:SelfEN -0.2708 0.0636 -4.260
par(mfrow = c(1, 2))
qqnorm(resid(m7.2)) # reasonably OK
qqline(resid(m7.2))
plot(fitted(m7.2), resid(m7.2)) # better
library(boot)
(bsm7.2 <- confint(m7.2, method = "boot", nsim = 1000, level = 0.95))
# 2.5 % 97.5 %
# .sig01 0.26616 0.3904
# .sig02 0.05719 0.5966
# .sig03 0.23968 0.3649
# .sigma 0.62334 0.6590
# (Intercept) 0.16228 0.3131
# Lang1 -0.19098 -0.0537
# BAC -0.76876 0.0527
# Edu -0.00264 0.1271
# SelfEN -0.01707 0.1694
# L2cnt 0.03094 0.2144
# L2anxiety -0.42732 -0.0596
# Lang1:BAC 0.09767 0.8765
# Lang1:SelfEN 0.07578 0.1996
Edu
does not contain 0 and therefore indicates it is a significant predictorlmer
to conduct mixed-effects regressionlmer
and why these are essential when you have multiple responses per subject or itemThank you for your attention!