
Martijn Wieling
University of Groningen
difference (in \(s\)) | \(n\) | \(p\) |
---|---|---|
0.01 | 40,000 | 0.05 |
0.10 | 400 | 0.05 |
0.25 | 64 | 0.05 |
0.54 | 16 | 0.05 |
load("dat.rda")
head(dat)
# Speaker Language PronDist PronDistCat LangDist LangDistAlt Age Sex AEO LR NrLang
# 1 arabic1 arabic 0.185727 Different 0.63699 0.44864 38 F 12 4 0
# 2 arabic10 arabic -0.172175 Similar 0.63699 0.44864 26 M 5 2 2
# 3 arabic13 arabic -0.035423 Similar 0.63699 0.44864 25 M 15 1 2
# 4 arabic12 arabic 0.372547 Different 0.63699 0.44864 32 M 11 8 0
# 5 arabic17 arabic -0.175237 Similar 0.63699 0.44864 35 M 15 0 1
# 6 arabic18 arabic 0.168120 Different 0.63699 0.44864 18 M 6 0 1
str(dat)
# 'data.frame': 712 obs. of 11 variables:
# $ Speaker : Factor w/ 712 levels "afrikaans1","afrikaans2",..: 21 22 25 24 27 28 26 30 31 23 ...
# $ Language : Factor w/ 159 levels "afrikaans","agni",..: 7 7 7 7 7 7 7 7 7 7 ...
# $ PronDist : num 0.1857 -0.1722 -0.0354 0.3725 -0.1752 ...
# $ PronDistCat: Factor w/ 2 levels "Different","Similar": 1 2 2 1 2 1 1 2 2 2 ...
# $ LangDist : num 0.637 0.637 0.637 0.637 0.637 ...
# $ LangDistAlt: num 0.449 0.449 0.449 0.449 0.449 ...
# $ Age : num 38 26 25 32 35 18 22 36 23 30 ...
# $ Sex : Factor w/ 2 levels "F","M": 1 2 2 2 2 2 2 2 1 1 ...
# $ AEO : num 12 5 15 11 15 6 16 12 10 14 ...
# $ LR : num 4 2 1 8 0 0 0 1 0 4 ...
# $ NrLang : int 0 2 2 0 1 1 2 2 2 1 ...
R
, corrects for this)german <- droplevels(dat[dat$Language == "german", ])
boxplot(german$PronDist)
abline(h = 0, col = "red", lty = 2)
t.test(german$PronDist, mu = 0)
#
# One Sample t-test
#
# data: german$PronDist
# t = -5.33, df = 21, p-value = 2.7e-05
# alternative hypothesis: true mean is not equal to 0
# 95 percent confidence interval:
# -0.208787 -0.091657
# sample estimates:
# mean of x
# -0.15022
library(lsr)
cohensD(german$PronDist, mu = 0)
# [1] 1.1373
library(swirl)
install_from_swirl("Mathematical_Biostatistics_Boot_Camp")
swirl()
in RStudio and finish the following lesson of the Mathematical Biostatistics Boot Camp course:
# aggregate data per language (159 languages)
lang <- aggregate(cbind(LangDist, LangDistAlt) ~ Language, data = dat, FUN = mean)
par(mfrow = c(1, 2))
boxplot(lang[, c("LangDist", "LangDistAlt")])
boxplot(lang$LangDist - lang$LangDistAlt, main = "Pairwise differences")
t.test(lang$LangDist, lang$LangDistAlt, paired = T)
#
# Paired t-test
#
# data: lang$LangDist and lang$LangDistAlt
# t = -3.73, df = 158, p-value = 0.00027
# alternative hypothesis: true mean difference is not equal to 0
# 95 percent confidence interval:
# -0.085703 -0.026367
# sample estimates:
# mean difference
# -0.056035
t.test(lang$LangDist - lang$LangDistAlt, mu = 0) # identical to one-sample test of differences
#
# One Sample t-test
#
# data: lang$LangDist - lang$LangDistAlt
# t = -3.73, df = 158, p-value = 0.00027
# alternative hypothesis: true mean is not equal to 0
# 95 percent confidence interval:
# -0.085703 -0.026367
# sample estimates:
# mean of x
# -0.056035
cohensD(lang$LangDist, lang$LangDistAlt, method = "paired") # effect size
# [1] 0.29585
rusger <- droplevels(dat[dat$Language %in% c("russian", "german"), ])
boxplot(PronDist ~ Language, data = rusger)
t.test(PronDist ~ Language, data = rusger, alternative = "two.sided")
#
# Welch Two Sample t-test
#
# data: PronDist by Language
# t = -3.56, df = 42.5, p-value = 0.00092
# alternative hypothesis: true difference in means between group german and group russian is not equal to 0
# 95 percent confidence interval:
# -0.267719 -0.074108
# sample estimates:
# mean in group german mean in group russian
# -0.150222 0.020691
cohensD(PronDist ~ Language, data = rusger)
# [1] 1.0166
library(car)
leveneTest(PronDist ~ Language, data = rusger)
# Levene's Test for Homogeneity of Variance (center = median)
# Df F value Pr(>F)
# group 1 5 0.03 *
# 45
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
russian <- droplevels(dat[dat$Language == "russian", ])
qqnorm(russian$PronDist) # plot actual values vs. theoretical quantiles
qqline(russian$PronDist) # plot reference line of normal distribution
shapiro.test(russian$PronDist)
#
# Shapiro-Wilk normality test
#
# data: russian$PronDist
# W = 0.958, p-value = 0.38
qqnorm(german$PronDist)
qqline(german$PronDist)
shapiro.test(german$PronDist)
#
# Shapiro-Wilk normality test
#
# data: german$PronDist
# W = 0.929, p-value = 0.12
wilcox.test
(similar to t.test
)par(mfrow = c(1, 2)) # visualization indicates non-parametric test necessary
qqnorm(russian$PronDist, main = "russian")
qqline(russian$PronDist)
qqnorm(german$PronDist, main = "german")
qqline(german$PronDist)
(model <- wilcox.test(PronDist ~ Language, data = rusger)) # default 2-tailed
#
# Wilcoxon rank sum exact test
#
# data: PronDist by Language
# W = 140, p-value = 0.0035
# alternative hypothesis: true location shift is not equal to 0
wilcox.effsize <- function(pval2tailed, N) {
(r <- abs(qnorm(pval2tailed/2)/sqrt(N))) # r = z / sqrt(N)
}
# rough guideline: r around 0.1 (small), > 0.3 (medium), > 0.5: large
wilcox.effsize(model$p.value, nrow(rusger))
# [1] 0.42616
# visualization indicates non-parametric necessary
qqnorm(german$PronDist)
qqline(german$PronDist)
(model <- wilcox.test(german$PronDist, mu = 0))
#
# Wilcoxon signed rank exact test
#
# data: german$PronDist
# V = 20, p-value = 0.00018
# alternative hypothesis: true location is not equal to 0
wilcox.effsize(model$p.value, nrow(german))
# [1] 0.79948
# No non-parametric test necessary
qqnorm(lang$LangDist - lang$LangDistAlt)
qqline(lang$LangDist - lang$LangDistAlt)
(model <- wilcox.test(lang$LangDist, lang$LangDistAlt, paired = TRUE))
#
# Wilcoxon signed rank test with continuity correction
#
# data: lang$LangDist and lang$LangDistAlt
# V = 4362, p-value = 0.00059
# alternative hypothesis: true location shift is not equal to 0
wilcox.effsize(model$p.value, nrow(lang))
# [1] 0.27242
languages <- c("farsi", "swedish", "polish")
dat3 <- droplevels(dat[dat$Language %in% languages, ])
(tab <- table(dat3$PronDistCat, dat3$Language))
#
# farsi polish swedish
# Different 6 6 1
# Similar 4 5 9
chisq.test(tab)
#
# Pearson's Chi-squared test
#
# data: tab
# X-squared = 6.25, df = 2, p-value = 0.044
cramersV(tab) # from library(lsr)
# [1] 0.4489
chisq.test(tab)$expected # warning as not all expected values >= 5
# Warning in chisq.test(tab): Chi-squared approximation may be incorrect
#
# farsi polish swedish
# Different 4.1935 4.6129 4.1935
# Similar 5.8065 6.3871 5.8065
fisher.test(tab) # solution: use Fisher's exact test as the appropriate alternative
#
# Fisher's Exact Test for Count Data
#
# data: tab
# p-value = 0.053
# alternative hypothesis: two.sided
# start with visualization
boxplot(PronDist ~ Language, data = dat3)
result <- aov(PronDist ~ Language, data = dat3)
# alternative if variances are not equal: oneway.test(), alternative if
# non-normal distribution: kruskal.test()
summary(result) # is the ANOVA significant?
# Df Sum Sq Mean Sq F value Pr(>F)
# Language 2 0.213 0.1067 4.16 0.026 *
# Residuals 28 0.718 0.0256
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etaSquared(result) # from library(lsr); small: 0.02, medium: 0.13, large: 0.26
# eta.sq eta.sq.part
# Language 0.22908 0.22908
posthocPairwiseT(result) # from library(lsr)
#
# Pairwise comparisons using t tests with pooled SD
#
# data: PronDist and Language
#
# farsi polish
# polish 0.63 -
# swedish 0.04 0.06
#
# P value adjustment method: holm
# alternative: TukeyHSD(result)
leveneTest(PronDist ~ Language, data = dat3)
# Levene's Test for Homogeneity of Variance (center = median)
# Df F value Pr(>F)
# group 2 0.69 0.51
# 28
par(mfrow = c(1, 3))
for (lang in levels(dat3$Language)) {
qqnorm(dat3[dat3$Language == lang, ]$PronDist, main = lang)
qqline(dat3[dat3$Language == lang, ]$PronDist)
}
aggregate(PronDist ~ Language, data = dat3, function(x) shapiro.test(x)$p.value)
# Language PronDist
# 1 farsi 0.035895
# 2 polish 0.922943
# 3 swedish 0.040296
table(dat3$Language) # unequal sample sizes
#
# farsi polish swedish
# 10 11 10
kruskal.test(PronDist ~ Language, data = dat3)
#
# Kruskal-Wallis rank sum test
#
# data: PronDist by Language
# Kruskal-Wallis chi-squared = 6.44, df = 2, p-value = 0.04
library(PMCMR)
posthoc.kruskal.dunn.test(PronDist ~ Language, data = dat3)
#
# Pairwise comparisons using Dunn's-test for multiple
# comparisons of independent samples
#
# data: PronDist by Language
#
# farsi polish
# polish 0.634 -
# swedish 0.051 0.099
#
# P value adjustment method: holm
pairs2 <- dat3[dat3$Language %in% c("swedish", "farsi"), ]
model <- wilcox.test(PronDist ~ Language, data = pairs2)
wilcox.effsize(model$p.value, nrow(pairs2))
# [1] 0.5075
aov
): SS(A), SS(B | A), SS(A*B | B, A)
R
are not)dat2 <- droplevels(dat[dat$Language %in% c("mandarin", "dutch"), ]) # new dataset
table(dat2$Language, dat2$Sex)
#
# F M
# dutch 7 7
# mandarin 14 9
# normality OK
aggregate(PronDist ~ Language, data = dat2, function(x) shapiro.test(x)$p.value)
# Language PronDist
# 1 dutch 0.39841
# 2 mandarin 0.34953
with(dat2, interaction.plot(Language, Sex, PronDist, col = c("blue", "red"), type = "b"))
summary(aov(PronDist ~ Language * Sex, data = dat2))
# Df Sum Sq Mean Sq F value Pr(>F)
# Language 1 0.347 0.347 20.06 8.5e-05 ***
# Sex 1 0.005 0.005 0.30 0.59
# Language:Sex 1 0.089 0.089 5.17 0.03 *
# Residuals 33 0.570 0.017
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(result <- aov(PronDist ~ Sex * Language, data = dat2))
# Df Sum Sq Mean Sq F value Pr(>F)
# Sex 1 0.000 0.000 0.00 0.95
# Language 1 0.352 0.352 20.36 7.7e-05 ***
# Sex:Language 1 0.089 0.089 5.17 0.03 *
# Residuals 33 0.570 0.017
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(result <- aov(PronDist ~ Language * Sex, data = dat2), type = 2) # from library(car), case sensitive!
# Anova Table (Type II tests)
#
# Response: PronDist
# Sum Sq Df F value Pr(>F)
# Language 0.352 1 20.36 7.7e-05 ***
# Sex 0.005 1 0.30 0.59
# Language:Sex 0.089 1 5.17 0.03 *
# Residuals 0.570 33
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etaSquared(result, type = 2)
# eta.sq eta.sq.part
# Language 0.3478048 0.3815433
# Sex 0.0051338 0.0090241
# Language:Sex 0.0883463 0.1354766
op <- options(contrasts = c("contr.sum", "contr.poly")) # set orthogonal contrasts for unordered and ordered factors
Anova(result <- aov(PronDist ~ Language * Sex, data = dat2), type = 3)
# Anova Table (Type III tests)
#
# Response: PronDist
# Sum Sq Df F value Pr(>F)
# (Intercept) 0.068 1 3.92 0.05611 .
# Language 0.320 1 18.52 0.00014 ***
# Sex 0.019 1 1.07 0.30784
# Language:Sex 0.089 1 5.17 0.02959 *
# Residuals 0.570 33
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etaSquared(result, type = 3)
# eta.sq eta.sq.part
# Language 0.316338 0.359431
# Sex 0.018328 0.031486
# Language:Sex 0.088346 0.135477
model.tables(result, type = "means")
# Tables of means
# Grand mean
#
# -0.017218
#
# Language
# dutch mandarin
# -0.1413 0.05828
# rep 14.0000 23.00000
#
# Sex
# F M
# -0.0275 -0.003726
# rep 21.0000 16.000000
#
# Language:Sex
# Sex
# Language F M
# dutch -0.216 -0.067
# rep 7.000 7.000
# mandarin 0.080 0.024
# rep 14.000 9.000
dat2$LangSex <- interaction(dat2$Language, dat2$Sex)
newresult <- aov(PronDist ~ LangSex, data = dat2)
posthocPairwiseT(newresult) # from library(lsr)
#
# Pairwise comparisons using t tests with pooled SD
#
# data: PronDist and LangSex
#
# dutch.F mandarin.F dutch.M
# mandarin.F 2e-04 - -
# dutch.M 0.125 0.086 -
# mandarin.M 0.005 0.355 0.355
#
# P value adjustment method: holm
Thank you for your attention!