STATISTICS COURSE - LAB SESSION 2

Martijn Wieling, http://www.martijnwieling.nl

## Generated on: December 25, 2014 - 11:07:18
# A0. Start R 3.1.2
# Start R and make sure the libraries lmer and car are installed
# Menu: Packages -> Install package(s)...  -> Select mirror -> Select packages
# or: 
# install.packages(c("lme4","car","gplots"),repos='http://cran.r-project.org')

# A1. These commands should work when the packages are installed
library(lme4)
library(car)
library(gplots)

# version information
R.version.string
## [1] "R version 3.1.2 (2014-10-31)"
packageVersion('lme4')
## [1] '1.1.7'
packageVersion('car')
## [1] '2.0.22'
packageVersion('gplots')
## [1] '2.15.0'
# A2. Now download the following files and load the custom functions and data
download.file('http://www.let.rug.nl/wieling/statscourse/lecture2/lab/myFunctions.R', 'myFunctions.R')
download.file('http://www.let.rug.nl/wieling/statscourse/lecture2/lab/eye.rda', 'eye.rda')
download.file('http://www.let.rug.nl/wieling/statscourse/lecture2/lab/eyeAll.rda', 'eyeAll.rda')

source('myFunctions.R') # custom functions
load('eye.rda') # data for mixed-effects regression
load('eyeAll.rda') # data for making focus plots

# A3. data investigation
head(eye)
##    Subject   Item TargetDefinite TargetNeuter TargetColor TargetBrown TargetPlace TargetTopRight CompColor CompPlace
## 1     S300  appel              1            0         red           0           1              0       red         2
## 4     S300    vat              0            1       brown           1           1              0     brown         3
## 5     S300   boek              1            1        blue           0           4              0      blue         3
## 8     S300  kanon              0            1       green           0           2              1     green         1
## 9     S300   glas              1            1       green           0           4              0     green         1
## 12    S300 varken              0            1       brown           1           2              1     brown         4
##    TupCdown CupTdown TrialID Age IsMale Edulevel SameColor SameGender TargetPerc  CompPerc FocusDiff
## 1         0        0      44  52      0        1         1          1   40.90909  6.818182  34.09091
## 4         1        0      43  52      0        1         1          0   27.90698  9.302326  18.60465
## 5         0        0       5  52      0        1         1          0   11.11111 25.000000 -13.88889
## 8         0        0      34  52      0        1         1          1   72.09302 27.906977  44.18605
## 9         0        1      32  52      0        1         1          1   59.52381  4.761905  54.76190
## 12        1        0      31  52      0        1         1          0   52.38095  0.000000  52.38095
str(eye)
## 'data.frame':    1102 obs. of  21 variables:
##  $ Subject       : Factor w/ 54 levels "S300","S301",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Item          : Factor w/ 48 levels "anker","appel",..: 2 42 7 19 13 41 26 43 9 29 ...
##  $ TargetDefinite: num  1 0 1 0 1 0 1 0 1 0 ...
##  $ TargetNeuter  : num  0 1 1 1 1 1 1 1 1 1 ...
##  $ TargetColor   : Factor w/ 5 levels "blue","brown",..: 4 2 1 3 3 2 4 1 2 3 ...
##  $ TargetBrown   : num  0 1 0 0 0 1 0 0 1 0 ...
##  $ TargetPlace   : Factor w/ 4 levels "1","2","3","4": 1 1 4 2 4 2 1 2 1 4 ...
##  $ TargetTopRight: num  0 0 0 1 0 1 0 1 0 0 ...
##  $ CompColor     : Factor w/ 5 levels "blue","brown",..: 4 2 1 3 3 2 4 1 2 3 ...
##  $ CompPlace     : Factor w/ 4 levels "1","2","3","4": 2 3 3 1 1 4 4 3 3 1 ...
##  $ TupCdown      : num  0 1 0 0 0 1 1 1 1 0 ...
##  $ CupTdown      : num  0 0 0 0 1 0 0 0 0 1 ...
##  $ TrialID       : int  44 43 5 34 32 31 26 8 22 44 ...
##  $ Age           : int  52 52 52 52 52 52 52 52 52 52 ...
##  $ IsMale        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Edulevel      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ SameColor     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ SameGender    : num  1 0 0 1 1 0 0 1 1 0 ...
##  $ TargetPerc    : num  40.9 27.9 11.1 72.1 59.5 ...
##  $ CompPerc      : num  6.82 9.3 25 27.91 4.76 ...
##  $ FocusDiff     : num  34.1 18.6 -13.9 44.2 54.8 ...
nrow(eye) # 1102 Subject-Item combinations (only when SameColor==True)
## [1] 1102
plot(density(eye$Age)) # e.g., to investigate distributions

# EXERCISE 1. Investigate eyeAll yourself

# ANSWER 1:
head(eyeAll)
##   Subject  Item Definiteness GenderComp TargetGender TargetPlace TimeOnsetDet TimeOnsetAdj TimeOnsetNoun  Time Target
## 1    S300 anker     Definite       same       Neuter           3          800          978          1559  0.00      0
## 2    S300 anker     Definite       same       Neuter           3          800          978          1559 16.67      0
## 3    S300 anker     Definite       same       Neuter           3          800          978          1559 33.34      0
## 4    S300 anker     Definite       same       Neuter           3          800          978          1559 50.01      0
## 5    S300 anker     Definite       same       Neuter           3          800          978          1559 66.68      0
## 6    S300 anker     Definite       same       Neuter           3          800          978          1559 83.35      0
##   Comp Dist  AOI ColorComp TargetColor
## 1    0    0 <NA>      same      yellow
## 2    0    0 <NA>      same      yellow
## 3    0    0 <NA>      same      yellow
## 4    0    0 <NA>      same      yellow
## 5    0    0 <NA>      same      yellow
## 6    0    0 <NA>      same      yellow
str(eyeAll)
## 'data.frame':    192922 obs. of  16 variables:
##  $ Subject      : Factor w/ 54 levels "S300","S301",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Item         : Factor w/ 48 levels "anker","appel",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Definiteness : Factor w/ 2 levels "Definite","Indefinite": 1 1 1 1 1 1 1 1 1 1 ...
##  $ GenderComp   : Factor w/ 2 levels "different","same": 2 2 2 2 2 2 2 2 2 2 ...
##  $ TargetGender : Factor w/ 2 levels "Common","Neuter": 2 2 2 2 2 2 2 2 2 2 ...
##  $ TargetPlace  : Factor w/ 4 levels "1","2","3","4": 3 3 3 3 3 3 3 3 3 3 ...
##  $ TimeOnsetDet : num  800 800 800 800 800 800 800 800 800 800 ...
##  $ TimeOnsetAdj : num  978 978 978 978 978 978 978 978 978 978 ...
##  $ TimeOnsetNoun: num  1559 1559 1559 1559 1559 ...
##  $ Time         : num  0 16.7 33.3 50 66.7 ...
##  $ Target       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Comp         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Dist         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ AOI          : Factor w/ 4 levels "1","2","3","4": NA NA NA NA NA NA NA NA NA NA ...
##  $ ColorComp    : Factor w/ 2 levels "different","same": 2 2 2 2 2 2 2 2 2 2 ...
##  $ TargetColor  : Factor w/ 5 levels "blue","brown",..: 5 5 5 5 5 5 5 5 5 5 ...
nrow(eyeAll)
## [1] 192922
summary(eyeAll)
##     Subject              Item            Definiteness       GenderComp    TargetGender   TargetPlace  TimeOnsetDet
##  S323   :  8591   sigaar   :  4475   Definite  :97095   different:96577   Common:97416   1:47658     Min.   :800  
##  S319   :  8423   vogel    :  4355   Indefinite:95827   same     :96345   Neuter:95506   2:48808     1st Qu.:800  
##  S305   :  7803   varken   :  4304                                                       3:48485     Median :800  
##  S310   :  7434   kanon    :  4291                                                       4:47971     Mean   :800  
##  S300   :  7383   anker    :  4245                                                                   3rd Qu.:800  
##  S312   :  7351   vliegtuig:  4229                                                                   Max.   :800  
##  (Other):145937   (Other)  :167023                                                                                
##   TimeOnsetAdj  TimeOnsetNoun       Time            Target            Comp             Dist           AOI       
##  Min.   : 900   Min.   :1410   Min.   :   0.0   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   1   :37093  
##  1st Qu.: 976   1st Qu.:1521   1st Qu.: 600.1   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   2   :38032  
##  Median :1000   Median :1538   Median :1200.2   Median :0.0000   Median :0.0000   Median :0.00000   3   :28550  
##  Mean   :1002   Mean   :1531   Mean   :1226.1   Mean   :0.3597   Mean   :0.1585   Mean   :0.07899   4   :26772  
##  3rd Qu.:1031   3rd Qu.:1550   3rd Qu.:1817.0   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   NA's:62475  
##  Max.   :1100   Max.   :1578   Max.   :4084.2   Max.   :1.0000   Max.   :1.0000   Max.   :0.50000               
##                                                                                                                 
##      ColorComp      TargetColor   
##  different:     0   blue  :40580  
##  same     :192922   brown :32120  
##                     green :40128  
##                     red   :40290  
##                     yellow:39804  
##                                   
## 
# A4. Investigating focus plots
myFocusPlot(eyeAll, usepdf=F) 

# A5. We will now investigate a split based on definiteness as well
myFocusPlot(eyeAll, splitDefiniteness=T, usepdf=F)

# A6. It is possible to visualize a certain time window
myFocusPlot(eyeAll, startTime="TimeOnsetNoun", endTime="TimeOnsetNoun+200", usepdf=F)

myFocusPlot(eyeAll, startTime=500, endTime=2000, usepdf=F)

# A7. We can also focus on the Competitor vs. Distractors
myFocusPlot(eyeAll, startTime=500, endTime=2000, plotTarget=F, usepdf=F)

###### MIXED-EFFECTS REGRESSION: RANDOM INTERCEPTS AND SLOPES ######

# B1. We create our first multiple regression model (no fixed-effects yet)
model  = lmer( FocusDiff ~ (1|Subject) + (1|Item) , data = eye )

# B2. Look at the results of the model, note the 0 variance of the Items
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ (1 | Subject) + (1 | Item)
##    Data: eye
## 
## REML criterion at convergence: 12267.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.84824 -0.79939  0.01522  0.82899  1.64878 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Item     (Intercept)    0.00   0.000  
##  Subject  (Intercept)   67.45   8.213  
##  Residual             3964.98  62.968  
## Number of obs: 1102, groups:  Item, 48; Subject, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    5.681      2.486   2.285
# B3. Plot the random intercepts
myInterceptPlot(eye,"FocusDiff","TrialID","Subject",model) # for subjects

x11() # use quartz() when on Mac OS
myInterceptPlot(eye,"FocusDiff","TrialID","Item",model) # for items

# B4. Is a by-item analysis necessary?
model1 = lmer(FocusDiff ~ (1|Subject),  data=eye)
model2 = lmer(FocusDiff ~ (1|Subject) + (1|Item),  data=eye)
AIC(model1) - AIC(model2) # < 2, so no
## [1] -2
# B5. Trying to add a fixed effect factor for Color
# Note this will return a warning as the column is constant in this subset
model3 = lmer(FocusDiff ~ SameColor + (1|Subject), data=eye)
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
# B6. Adding the contrast between men and women: IsMale
model4 = lmer(FocusDiff ~ IsMale + (1|Subject), data=eye)
summary(model4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + (1 | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12258.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.86973 -0.78928  0.01855  0.83985  1.68155 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept)   55.73   7.465  
##  Residual             3962.86  62.951  
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    9.712      3.237   3.001
## IsMale        -9.057      4.810  -1.883
## 
## Correlation of Fixed Effects:
##        (Intr)
## IsMale -0.673
# It is close to absolute value of 2, so we keep it in for now,
# if it is n.s. in our final model, we drop it.

# EXERCISE 2: Why does it not make sense to vary the effect of IsMale 
#             per Subject as a random slope? 
#             Would it make sense to vary it per item instead?
#             (If Item would have been required as a random-effect factor.) 

# ANSWER 2: IsMale is a characteristic of the Subject and it is 
#           therefore constant for all measurements associated within one subject.
#           It would make sense (if item would have been included as a random-effect factor)
#           to vary the influence of gender per item (as males and females would have responded
#           to one item). 

# B7. We investigate the effect of gender competitor and target gender:
model5 = lmer(FocusDiff ~ IsMale + TargetNeuter + SameGender + (1|Subject), 
              data=eye)
summary(model5) # both not significant
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter + SameGender + (1 | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12248.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.87591 -0.77863  0.01876  0.84407  1.67588 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept)   55.18   7.428  
##  Residual             3967.14  62.985  
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)   11.8475     4.1705   2.841
## IsMale        -9.0466     4.8032  -1.884
## TargetNeuter  -3.5062     3.7990  -0.923
## SameGender    -0.9019     3.7971  -0.238
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt
## IsMale      -0.522              
## TargetNeutr -0.432 -0.003       
## SameGender  -0.456  0.004 -0.012
# B8. We investigate the interaction between gender competitor 
#     and target gender:
model6 = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + (1|Subject), 
              data=eye)
summary(model6) # both significant, as well as their interaction
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter * SameGender + (1 | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12232.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9799 -0.7914  0.0429  0.8148  1.7613 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept)   56.06   7.488  
##  Residual             3931.26  62.700  
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)               17.828      4.542   3.925
## IsMale                    -9.043      4.804  -1.883
## TargetNeuter             -16.033      5.370  -2.985
## SameGender               -12.894      5.255  -2.454
## TargetNeuter:SameGender   24.852      7.564   3.286
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt SmGndr
## IsMale      -0.479                     
## TargetNeutr -0.562 -0.002              
## SameGender  -0.578  0.003  0.487       
## TrgtNtr:SmG  0.400  0.001 -0.710 -0.695
# B9. Preparing for the test if the interaction is necessary 
#     (note the REML=F as we are comparing fixed effects) 
model4b = lmer(FocusDiff ~ IsMale + (1|Subject), data=eye, REML=F)
model6b = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + (1|Subject), 
               data=eye, REML=F)

# EXERCISE 3. Do we need the interaction (i.e. compare model4b and model6b)?

# ANSWER 3:
AIC(model4b) - AIC(model6b) # > 2, so we need the interaction
## [1] 5.691867
# B10. We need to test if their inclusion as a random slope is necessary 
# and influences the significance as fixed effects (in the interaction or 
# separately).
model6 = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + (1|Subject), 
              data=eye)
model7 = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + 
              (1 + TargetNeuter * SameGender|Subject), data=eye)

summary(model7)
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter * SameGender + (1 + TargetNeuter *      SameGender | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12227.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.14593 -0.78363  0.03436  0.81166  1.85200 
## 
## Random effects:
##  Groups   Name                    Variance Std.Dev. Corr             
##  Subject  (Intercept)              242.4   15.57                     
##           TargetNeuter             260.7   16.14    -0.66            
##           SameGender               359.9   18.97    -0.94  0.88      
##           TargetNeuter:SameGender  354.0   18.81     0.77 -0.99 -0.94
##  Residual                         3864.3   62.16                     
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)               17.683      5.243   3.372
## IsMale                    -7.746      4.794  -1.616
## TargetNeuter             -15.894      6.200  -2.564
## SameGender               -13.416      6.394  -2.098
## TargetNeuter:SameGender   24.814      8.357   2.969
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt SmGndr
## IsMale      -0.414                     
## TargetNeutr -0.612  0.000              
## SameGender  -0.720  0.002  0.602       
## TrgtNtr:SmG  0.506 -0.001 -0.771 -0.749
AIC(model6) - AIC(model7) # not better
## [1] -13.50886
# B11. Add the factor TargetColor to the best previous model (model6)
model8 = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetColor + 
                          (1|Subject), data=eye)
summary(model8) # it appears we need a contrast for brown
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetColor +      (1 | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12201.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.02331 -0.77719  0.02425  0.80454  1.92641 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept)   54.22   7.363  
##  Residual             3908.80  62.520  
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)               18.081      5.835   3.099
## IsMale                    -8.896      4.768  -1.866
## TargetNeuter             -16.046      5.356  -2.996
## SameGender               -12.778      5.241  -2.438
## TargetColorbrown         -13.826      6.239  -2.216
## TargetColorgreen           4.879      5.782   0.844
## TargetColorred             2.196      5.791   0.379
## TargetColoryellow          1.937      5.877   0.330
## TargetNeuter:SameGender   24.553      7.545   3.254
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt SmGndr TrgtClrb TrgtClrg TrgtClrr TrgtClry
## IsMale      -0.364                                                         
## TargetNeutr -0.447 -0.003                                                  
## SameGender  -0.463  0.003  0.487                                           
## TrgtClrbrwn -0.467 -0.010  0.012  0.013                                    
## TargtClrgrn -0.512  0.009  0.012  0.024  0.466                             
## TargetClrrd -0.498 -0.020  0.010  0.012  0.466    0.502                    
## TrgtClryllw -0.498 -0.013  0.019  0.024  0.459    0.495    0.495           
## TrgtNtr:SmG  0.321  0.000 -0.710 -0.695 -0.005   -0.021   -0.005   -0.020
eye$TargetBrown = (eye$TargetColor == "brown")*1 # 1 if brown, 0 otherwise

# EXERCISE 4. Add the contrast TargetBrown to the best previous model (model6)
#             and test if the model is better than the more complex model (model8).
#             Store the model as model9.

# ANSWER 4:
model9 = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown + 
               (1|Subject), data=eye)
summary(model9) # highly significant
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown +      (1 | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12217.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.02852 -0.79091  0.02425  0.80506  1.92887 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept)   54.7    7.396  
##  Residual             3900.3   62.453  
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)               20.416      4.592   4.446
## IsMale                    -8.949      4.770  -1.876
## TargetNeuter             -16.090      5.349  -3.008
## SameGender               -12.876      5.234  -2.460
## TargetBrown              -16.090      5.126  -3.139
## TargetNeuter:SameGender   24.678      7.534   3.275
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt SmGndr TrgtBr
## IsMale      -0.470                            
## TargetNeutr -0.554 -0.002                     
## SameGender  -0.569  0.003  0.487              
## TargetBrown -0.180 -0.006  0.003 -0.001       
## TrgtNtr:SmG  0.393  0.001 -0.710 -0.695  0.007
# comparing fixed effects, so REML=F
model8b = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetColor + 
               (1|Subject), data=eye, REML=F)
model9b = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown + 
               (1|Subject), data=eye, REML=F)
AIC(model9b) - AIC(model8b) # negative
## [1] -5.268666
# so there is no support for the MORE COMPLEX model
# and we'll stay with the simplest model (model9)

# B12. To get an estimate of the performance of your model
#      (including the variation explained by the random structure) use:
cor(eye$FocusDiff, fitted(model9))^2
## [1] 0.04634835
# EXERCISE 5. Investigate if other parameters (e.g., age) included in the 
#             dataset (colnames(eye) shows the columnnames) should be included
#             as fixed effects. When including a variable as fixed effect
#             always test if it also has to be included as a random slope
#             (if it varies per subject). 

# ANSWER 5: (example)
colnames(eye)
##  [1] "Subject"        "Item"           "TargetDefinite" "TargetNeuter"   "TargetColor"    "TargetBrown"   
##  [7] "TargetPlace"    "TargetTopRight" "CompColor"      "CompPlace"      "TupCdown"       "CupTdown"      
## [13] "TrialID"        "Age"            "IsMale"         "Edulevel"       "SameColor"      "SameGender"    
## [19] "TargetPerc"     "CompPerc"       "FocusDiff"
summary(model10 <- lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown + Age + 
                                    (1|Subject), data=eye)) # Age n.s.
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown +      Age + (1 | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12215.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.04654 -0.79007  0.02906  0.80377  1.92439 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept)   46.91   6.849  
##  Residual             3900.01  62.450  
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)              45.2689    16.5992   2.727
## IsMale                   -7.3601     4.7466  -1.551
## TargetNeuter            -16.0363     5.3488  -2.998
## SameGender              -12.9387     5.2335  -2.472
## TargetBrown             -16.1432     5.1253  -3.150
## Age                      -0.5362     0.3436  -1.560
## TargetNeuter:SameGender  24.6762     7.5336   3.275
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt SmGndr TrgtBr Age   
## IsMale       0.076                                   
## TargetNeutr -0.146 -0.001                            
## SameGender  -0.165  0.001  0.487                     
## TargetBrown -0.054 -0.007  0.003 -0.001              
## Age         -0.962 -0.208 -0.007  0.008  0.005       
## TrgtNtr:SmG  0.109  0.001 -0.710 -0.695  0.007  0.000
summary(model10 <- lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown + TargetTopRight + 
                                    (1|Subject), data=eye)) # TargetTopRight sign.
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown +      TargetTopRight + (1 | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12196.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.24775 -0.78994  0.04208  0.83402  1.95539 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept)   52.89   7.273  
##  Residual             3847.19  62.026  
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)               16.463      4.656   3.535
## IsMale                    -8.546      4.721  -1.810
## TargetNeuter             -16.898      5.316  -3.178
## SameGender               -12.775      5.198  -2.458
## TargetBrown              -18.804      5.135  -3.662
## TargetTopRight            17.409      4.310   4.039
## TargetNeuter:SameGender   24.729      7.483   3.305
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt SmGndr TrgtBr TrgtTR
## IsMale      -0.463                                   
## TargetNeutr -0.535 -0.003                            
## SameGender  -0.558  0.003  0.487                     
## TargetBrown -0.147 -0.009  0.008 -0.002              
## TargtTpRght -0.210  0.021 -0.038  0.005 -0.131       
## TrgtNtr:SmG  0.384  0.001 -0.710 -0.695  0.007  0.002
# center TargetTopRight
summary(model10a <- lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown + TargetTopRight + 
                                     (1+TargetTopRight|Subject), data=eye)) # TargetTopRight still sign.
## Linear mixed model fit by REML ['lmerMod']
## Formula: FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown +  
##     TargetTopRight + (1 + TargetTopRight | Subject)
##    Data: eye
## 
## REML criterion at convergence: 12182.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.36221 -0.79803  0.05601  0.79214  2.08252 
## 
## Random effects:
##  Groups   Name           Variance Std.Dev. Corr 
##  Subject  (Intercept)      87.87   9.374        
##           TargetTopRight  752.06  27.424   -0.58
##  Residual                3711.30  60.920        
## Number of obs: 1102, groups:  Subject, 28
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)               16.122      4.758   3.389
## IsMale                    -9.330      4.781  -1.951
## TargetNeuter             -15.854      5.247  -3.022
## SameGender               -12.691      5.107  -2.485
## TargetBrown              -19.096      5.060  -3.774
## TargetTopRight            18.876      6.819   2.768
## TargetNeuter:SameGender   24.736      7.353   3.364
## 
## Correlation of Fixed Effects:
##             (Intr) IsMale TrgtNt SmGndr TrgtBr TrgtTR
## IsMale      -0.458                                   
## TargetNeutr -0.514 -0.006                            
## SameGender  -0.536  0.002  0.484                     
## TargetBrown -0.141 -0.010  0.007 -0.002              
## TargtTpRght -0.301  0.018 -0.026  0.003 -0.080       
## TrgtNtr:SmG  0.369  0.001 -0.706 -0.695  0.008  0.002
AIC(model10) - AIC(model10a) # 10a is better: random slope necessary
## [1] 9.617617
# B13. Model criticism (on the basis of model9)
qqp(resid(model9)) # not normal

# no heteroskedasticity, but this is due to the bounded nature of the 
# dependent variable (always between -100 and 100)
plot(scale(resid(model9)) ~ fitted(model9), pch='.',cex=2, ylim=c(-5,5))
abline(h = c(-2.5, 2.5))

eye2 = eye[ abs(scale(resid(model9))) < 2 , ]
(1-(nrow(eye2))/nrow(eye))*100 # 0.36% removed
## [1] 0.3629764
model9.v2 = lmer(FocusDiff ~ IsMale + TargetNeuter * SameGender + TargetBrown + 
               (1|Subject), data=eye2)
cor(eye$FocusDiff, fitted(model9))^2 
## [1] 0.04634835
cor(eye2$FocusDiff, fitted(model9.v2))^2 # improved prediction
## [1] 0.0573272
qqp(resid(model9.v2)) # unfortunately, residuals not better 

# The problem with the residuals might be caused by by our choice of 
# dependent variable here (which is essentially bounded by the 
# time span we are looking at), so a better approach would 
# be to predict for every point in time if someone is looking at
# the target or not (or a certain distance from the target), 
# and then take into account time in the 
# analysis. We will illustrate an analysis which takes into
# account a time-signal in the fourth lecture of this course.


Replication of the analysis

To run all analyses yourself, you first have to install pandoc, and then you can just copy the following lines to the most recent version of R. You need the packages ‘lme4’, ‘car’, ‘gplots’ and ‘rmarkdown’. If these are not installed (the library commands will throw an error), you can uncomment (i.e. remove the hashtag) the first four lines to install them. (Note that you can ignore the warnings.)

#install.packages('lme4',repos='http://cran.us.r-project.org')
#install.packages('car',repos='http://cran.us.r-project.org')
#install.packages('gplots',repos='http://cran.us.r-project.org')
#install.packages('rmarkdown',repos='http://cran.us.r-project.org')
download.file('http://www.let.rug.nl/wieling/statscourse/lecture2/lab/answers/lab-including-answers.Rmd', 'lab-including-answers.Rmd')
library(rmarkdown)
render('lab-including-answers.Rmd') # generates html file with results
browseURL(paste('file://', file.path(getwd(),'lab-including-answers.html'), sep='')) # shows result