Martijn Wieling
Computational Linguistics Research Group
                                    Estimate  Std. Error  t value  Pr(>|t|)    
Linear regression     DistOrigin  -6.418e-05   1.808e-06   -35.49    <2e-16
+ Random intercepts   DistOrigin  -2.224e-05   6.863e-06   -3.240    <0.001
+ Random slopes       DistOrigin  -1.478e-05   1.519e-05   -0.973    n.s.
This example is explained at the HLP/Jaeger lab blog
lmer( RT ~ WF + WL + SA + (1+SA|Wrd) + (1+WF|Subj) )lmer automatically discovers random-effects structure (nested/crossed)
lmer do not suffer from shrinkagelmer takes into account regression towards the mean (fast subjects will be slower next time, and slow subjects will be faster) thereby avoiding overfitting and improving prediction - see Efron & Morris (1977)
acf(resid(model), main=" ", ylab="autocorrelation function")
baseheid): base preceded neologism (fluffy - fluffiness)heid): identity priming (fluffiness - fluffiness)heid) faster than those in base condition (baseheid)lme4 version 1.1.12)library(lme4)
dat <- read.table("datprevrt.txt", header = T)  # adapted primingHeid data set
dat.lmer1 <- lmer(RT.log ~ Condition + (1 | Word) + (1 | Subject), data = dat)
summary(dat.lmer1, cor = F)
# Linear mixed model fit by REML ['lmerMod']
# Formula: RT.log ~ Condition + (1 | Word) + (1 | Subject)
#    Data: dat
# 
# REML criterion at convergence: -102
# 
# Scaled residuals: 
#    Min     1Q Median     3Q    Max 
# -2.359 -0.691 -0.134  0.590  4.261 
# 
# Random effects:
#  Groups   Name        Variance Std.Dev.
#  Word     (Intercept) 0.00341  0.0584  
#  Subject  (Intercept) 0.04084  0.2021  
#  Residual             0.04408  0.2100  
# Number of obs: 832, groups:  Word, 40; Subject, 26
# 
# Fixed effects:
#               Estimate Std. Error t value
# (Intercept)     6.6030     0.0421   156.7
# Conditionheid   0.0313     0.0147     2.1
  dat.lmer2 <- lmer(RT.log ~ Trial + Condition + (1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer2)$coef
#                Estimate Std. Error t value
# (Intercept)    6.633384   4.67e-02  142.15
# Trial         -0.000146   9.62e-05   -1.52
# Conditionheid  0.030977   1.47e-02    2.11
  dat.lmer3 <- lmer(RT.log ~ PrevRT.log + Condition + (1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer3)$coef
#               Estimate Std. Error t value
# (Intercept)     5.8047     0.2230   26.03
# PrevRT.log      0.1212     0.0334    3.63
# Conditionheid   0.0279     0.0146    1.90
  dat.lmer4 <- lmer(RT.log ~ RTtoPrime.log + PrevRT.log + Condition + (1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer4)$coef
#               Estimate Std. Error t value
# (Intercept)    4.74877     0.2953  16.080
# RTtoPrime.log  0.16379     0.0319   5.141
# PrevRT.log     0.11901     0.0330   3.605
# Conditionheid -0.00612     0.0160  -0.383
  dat.lmer5 <- lmer(RT.log ~ RTtoPrime.log + ResponseToPrime + PrevRT.log + Condition + (1 | Subject) + 
    (1 | Word), data = dat)
summary(dat.lmer5)$coef
#                          Estimate Std. Error t value
# (Intercept)                4.7634     0.2923   16.30
# RTtoPrime.log              0.1650     0.0315    5.24
# ResponseToPrimeincorrect   0.1004     0.0226    4.45
# PrevRT.log                 0.1142     0.0327    3.49
# Conditionheid             -0.0178     0.0161   -1.11
  dat.lmer6 <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + Condition + (1 | Subject) + 
    (1 | Word), data = dat)
summary(dat.lmer6)$coef
#                                        Estimate Std. Error t value
# (Intercept)                              4.3244     0.3152   13.72
# RTtoPrime.log                            0.2276     0.0359    6.33
# ResponseToPrimeincorrect                 1.4548     0.4052    3.59
# PrevRT.log                               0.1183     0.0325    3.64
# Conditionheid                           -0.0266     0.0162   -1.64
# RTtoPrime.log:ResponseToPrimeincorrect  -0.2025     0.0606   -3.34
dat.lmer7 <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition + 
    (1 | Subject) + (1 | Word), data = dat)
summary(dat.lmer7)$varcor
#  Groups   Name        Std.Dev.
#  Word     (Intercept) 0.0339  
#  Subject  (Intercept) 0.1549  
#  Residual             0.2055
summary(dat.lmer7)$coef
#                                        Estimate Std. Error t value
# (Intercept)                             4.44098    0.31961   13.90
# RTtoPrime.log                           0.21824    0.03615    6.04
# ResponseToPrimeincorrect                1.39705    0.40516    3.45
# PrevRT.log                              0.11542    0.03246    3.56
# BaseFrequency                          -0.00924    0.00437   -2.11
# Conditionheid                          -0.02466    0.01618   -1.52
# RTtoPrime.log:ResponseToPrimeincorrect -0.19399    0.06055   -3.20
  dat.lmer7a <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition + 
    (1 | Subject) + (0 + BaseFrequency | Subject) + (1 | Word), data = dat)
summary(dat.lmer7a)$coef
#                                        Estimate Std. Error t value
# (Intercept)                             4.48231    0.31736   14.12
# RTtoPrime.log                           0.21812    0.03595    6.07
# ResponseToPrimeincorrect                1.41675    0.40206    3.52
# PrevRT.log                              0.10849    0.03235    3.35
# BaseFrequency                          -0.00795    0.00535   -1.48
# Conditionheid                          -0.02453    0.01603   -1.53
# RTtoPrime.log:ResponseToPrimeincorrect -0.19667    0.06008   -3.27
  AIC(dat.lmer7) - AIC(dat.lmer7a)  # compare AIC
# [1] 3.65
# Alternative to AIC comparison: Likelihood Ratio Test
anova(dat.lmer7, dat.lmer7a, refit = F)  # compares models
# Data: dat
# Models:
# dat.lmer7: RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + 
# dat.lmer7:     Condition + (1 | Subject) + (1 | Word)
# dat.lmer7a: RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + 
# dat.lmer7a:     Condition + (1 | Subject) + (0 + BaseFrequency | Subject) + 
# dat.lmer7a:     (1 | Word)
#            Df  AIC   BIC logLik deviance Chisq Chi Df Pr(>Chisq)  
# dat.lmer7  10 -126 -78.3   72.8     -146                          
# dat.lmer7a 11 -129 -77.2   75.6     -151  5.65      1      0.017 *
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  dat.lmer7b <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition + 
    (1 + BaseFrequency | Subject) + (1 | Word), data = dat)
summary(dat.lmer7b)$varcor
#  Groups   Name          Std.Dev. Corr
#  Word     (Intercept)   0.0344       
#  Subject  (Intercept)   0.1291       
#           BaseFrequency 0.0136   0.41
#  Residual               0.2035
AIC(dat.lmer7a) - AIC(dat.lmer7b)  # correlation parameter not necessary
# [1] -1.14
anova(dat.lmer7a, dat.lmer7b, refit = F)$P[2]  # p-value from anova
# [1] 0.353
  qqnorm(resid(dat.lmer7a))
qqline(resid(dat.lmer7a))
plot(fitted(dat.lmer7a), resid(dat.lmer7a))
dat2 <- dat[abs(scale(resid(dat.lmer7a))) < 2.5, ]
dat2.lmer7a <- lmer(RT.log ~ RTtoPrime.log * ResponseToPrime + PrevRT.log + BaseFrequency + Condition + 
    (1 | Subject) + (0 + BaseFrequency | Subject) + (1 | Word), data = dat2)
summary(dat2.lmer7a)$coef
#                                        Estimate Std. Error t value
# (Intercept)                             4.44735    0.28598   15.55
# RTtoPrime.log                           0.23511    0.03200    7.35
# ResponseToPrimeincorrect                1.56001    0.35551    4.39
# PrevRT.log                              0.09554    0.02926    3.27
# BaseFrequency                          -0.00815    0.00459   -1.78
# Conditionheid                          -0.03814    0.01435   -2.66
# RTtoPrime.log:ResponseToPrimeincorrect -0.21616    0.05316   -4.07
heid condition are responded faster to than those in the baseheid condition(noutliers <- sum(abs(scale(resid(dat.lmer7a))) >= 2.5))
# [1] 17
noutliers/nrow(dat)
# [1] 0.0204
cor(dat$RT, fitted(dat.lmer7a))^2
# [1] 0.521
cor(dat2$RT, fitted(dat2.lmer7a))^2
# [1] 0.572
  library(car)
par(mfrow = c(1, 2))
qqp(resid(dat2.lmer7a))
plot(fitted(dat2.lmer7a), resid(dat2.lmer7a))
library(boot)
(bs.lmer7a <- confint(dat2.lmer7a, method = "boot", nsim = 1000, level = 0.95))
#                                            2.5 %    97.5 %
# .sig01                                  0.00e+00  0.040543
# .sig02                                  1.41e-05  0.021509
# .sig03                                  9.75e-02  0.190691
# .sigma                                  1.71e-01  0.189190
# (Intercept)                             3.83e+00  4.994796
# RTtoPrime.log                           1.74e-01  0.302521
# ResponseToPrimeincorrect                8.53e-01  2.255276
# PrevRT.log                              3.66e-02  0.157534
# BaseFrequency                          -1.76e-02  0.000579
# Conditionheid                          -6.73e-02 -0.010342
# RTtoPrime.log:ResponseToPrimeincorrect -3.19e-01 -0.112836
  lmer to conduct mixed-effects regressionlmer and why these are essential when you have multiple responses per subject or itemThank you for your attention!