
Martijn Wieling
BCN Statistics Course, University of Groningen
data(iris) # standard dataset: contains petal and sepal length and width and species of irises
cor.test(iris$Sepal.Length, iris$Petal.Length) # cor(A,B) just gives the correlation
#
# Pearson's product-moment correlation
#
# data: iris$Sepal.Length and iris$Petal.Length
# t = 21.6, df = 148, p-value <2e-16
# alternative hypothesis: true correlation is not equal to 0
# 95 percent confidence interval:
# 0.82704 0.90551
# sample estimates:
# cor
# 0.87175
# Also when residuals not normally distributed
cor.test(iris$Sepal.Length, iris$Petal.Length, method = "spearman")
#
# Spearman's rank correlation rho
#
# data: iris$Sepal.Length and iris$Petal.Length
# S = 66400, p-value <2e-16
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
# rho
# 0.8819
# Similar result
iris$Sepal.Length.rank <- rank(iris$Sepal.Length)
iris$Petal.Length.rank <- rank(iris$Petal.Length)
cor.test(iris$Sepal.Length.rank, iris$Petal.Length.rank)$estimate
# cor
# 0.8819
library(corrgram)
corrgram(iris[, c("Sepal.Width", "Sepal.Length", "Petal.Length")], lower.panel = panel.shade,
upper.panel = panel.pie)
iris$Petal.Area <- iris$Petal.Width * iris$Petal.Length
model0 <- lm(Petal.Area ~ Sepal.Length, data = iris)
summary(model0)
#
# Call:
# lm(formula = Petal.Area ~ Sepal.Length, data = iris)
#
# Residuals:
# Min 1Q Median 3Q Max
# -5.343 -1.588 -0.198 1.459 6.978
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) -22.714 1.421 -16.0 <2e-16 ***
# Sepal.Length 4.879 0.241 20.3 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 2.43 on 148 degrees of freedom
# Multiple R-squared: 0.735, Adjusted R-squared: 0.733
# F-statistic: 410 on 1 and 148 DF, p-value: <2e-16
round(model0$coefficients, 2)
# (Intercept) Sepal.Length
# -22.71 4.88
iris$FittedPA <- fitted(model0)
head(iris, 2)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species Petal.Area FittedPA
# 1 5.1 3.5 1.4 0.2 setosa 0.28 2.1675
# 2 4.9 3.0 1.4 0.2 setosa 0.28 1.1918
model1 <- lm(Petal.Area ~ Sepal.Length + Sepal.Width, data = iris)
summary(model1)
#
# Call:
# lm(formula = Petal.Area ~ Sepal.Length + Sepal.Width, data = iris)
#
# Residuals:
# Min 1Q Median 3Q Max
# -4.416 -1.491 -0.298 1.178 7.535
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) -15.681 1.980 -7.92 5.4e-13 ***
# Sepal.Length 4.751 0.226 20.99 < 2e-16 ***
# Sepal.Width -2.057 0.430 -4.78 4.1e-06 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 2.27 on 147 degrees of freedom
# Multiple R-squared: 0.771, Adjusted R-squared: 0.768
# F-statistic: 247 on 2 and 147 DF, p-value: <2e-16
round(model1$coefficients, 2)
# (Intercept) Sepal.Length Sepal.Width
# -15.68 4.75 -2.06
Petal.Area = 4.75 \(\times\) Sepal.Length + -2.06 \(\times\) Sepal.Width + -15.68
For sepal length of 5.1 and sepal width of 3.5, predicted (fitted) petal area:
4.75 \(\times\) 5.1 + -2.06 \(\times\) 3.5 + -15.68 = 1.35
iris$FittedPA <- fitted(model1)
head(iris, 1)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species Petal.Area FittedPA
# 1 5.1 3.5 1.4 0.2 setosa 0.28 1.3515
anova(model0, model1) # model comparison
# Analysis of Variance Table
#
# Model 1: Petal.Area ~ Sepal.Length
# Model 2: Petal.Area ~ Sepal.Length + Sepal.Width
# Res.Df RSS Df Sum of Sq F Pr(>F)
# 1 148 877
# 2 147 759 1 118 22.9 4.1e-06 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(model0) - AIC(model1) # AIC decrease >= 2: more complex model should be used
# [1] 19.703
summary(model2 <- lm(Petal.Area ~ Sepal.Length + Sepal.Width + Sepal.Length:Sepal.Width,
data = iris)) # Shorthand: Sepal.Length * Sepal.Width
#
# Call:
# lm(formula = Petal.Area ~ Sepal.Length + Sepal.Width + Sepal.Length:Sepal.Width,
# data = iris)
#
# Residuals:
# Min 1Q Median 3Q Max
# -4.713 -1.379 -0.223 1.176 7.240
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 14.047 10.708 1.31 0.19167
# Sepal.Length -0.488 1.869 -0.26 0.79438
# Sepal.Width -11.596 3.405 -3.41 0.00085 ***
# Sepal.Length:Sepal.Width 1.686 0.597 2.82 0.00543 **
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 2.22 on 146 degrees of freedom
# Multiple R-squared: 0.783, Adjusted R-squared: 0.778
# F-statistic: 175 on 3 and 146 DF, p-value: <2e-16
library(rockchalk)
plotSlopes(model2, Sepal.Length, modx = Sepal.Width)
anova(model1, model2) # model comparison
# Analysis of Variance Table
#
# Model 1: Petal.Area ~ Sepal.Length + Sepal.Width
# Model 2: Petal.Area ~ Sepal.Length + Sepal.Width + Sepal.Length:Sepal.Width
# Res.Df RSS Df Sum of Sq F Pr(>F)
# 1 147 759
# 2 146 720 1 39.3 7.97 0.0054 **
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(model1) - AIC(model2) # AIC decrease >= 2: more complex model should be used
# [1] 5.9696
library(car)
Anova(aov(Petal.Area ~ Sepal.Length * Sepal.Width, data = iris), type = 3) # identical to: Anova(model2, type=3)
# Anova Table (Type III tests)
#
# Response: Petal.Area
# Sum Sq Df F value Pr(>F)
# (Intercept) 8 1 1.72 0.19167
# Sepal.Length 0 1 0.07 0.79438
# Sepal.Width 57 1 11.59 0.00085 ***
# Sepal.Length:Sepal.Width 39 1 7.97 0.00543 **
# Residuals 720 146
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model2)$coef
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 14.04668 10.70844 1.3117 0.19166689
# Sepal.Length -0.48811 1.86945 -0.2611 0.79438330
# Sepal.Width -11.59565 3.40542 -3.4051 0.00085441
# Sepal.Length:Sepal.Width 1.68611 0.59737 2.8226 0.00542907
# (Intercept) Sepal.Length Sepal.Width Sepal.Length:Sepal.Width
# [1,] 14.05 -0.49 -11.6 1.69
iris$FittedPA <- fitted(model2)
head(iris, 1)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species Petal.Area FittedPA
# 1 5.1 3.5 1.4 0.2 setosa 0.28 1.0696
iris$Sepal.Length.c <- iris$Sepal.Length - mean(iris$Sepal.Length)
iris$Sepal.Width.c <- iris$Sepal.Width - mean(iris$Sepal.Width)
model2b <- lm(Petal.Area ~ Sepal.Length.c + Sepal.Width.c + Sepal.Length.c:Sepal.Width.c,
data = iris)
round(summary(model2b)$coefficients, 2) # now everything is significant
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 5.87 0.18 32.05 0.00
# Sepal.Length.c 4.67 0.22 20.91 0.00
# Sepal.Width.c -1.74 0.43 -4.01 0.00
# Sepal.Length.c:Sepal.Width.c 1.69 0.60 2.82 0.01
round(summary(model2)$coefficients, 2)
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 14.05 10.71 1.31 0.19
# Sepal.Length -0.49 1.87 -0.26 0.79
# Sepal.Width -11.60 3.41 -3.41 0.00
# Sepal.Length:Sepal.Width 1.69 0.60 2.82 0.01
iris$FittedPA <- fitted(model2)
iris$FittedPA.c <- fitted(model2b)
head(iris[, c("Sepal.Length", "Sepal.Width", "FittedPA", "FittedPA.c")], 1)
# Sepal.Length Sepal.Width FittedPA FittedPA.c
# 1 5.1 3.5 1.0696 1.0696
levels(iris$Species) # factor levels of Species variable (by default ordered alphabetically)
# [1] "setosa" "versicolor" "virginica"
contrasts(iris$Species) # show internal dummy coding
# versicolor virginica
# setosa 0 0
# versicolor 1 0
# virginica 0 1
iris$Species <- relevel(iris$Species, "versicolor") # change reference level
contrasts(iris$Species) # dummy coding)
# setosa virginica
# versicolor 0 0
# setosa 1 0
# virginica 0 1
model <- lm(Petal.Area ~ Species, data = iris)
summary(model)
#
# Call:
# lm(formula = Petal.Area ~ Species, data = iris)
#
# Residuals:
# Min 1Q Median 3Q Max
# -3.796 -0.520 -0.066 0.671 4.574
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 5.720 0.209 27.4 <2e-16 ***
# Speciessetosa -5.355 0.296 -18.1 <2e-16 ***
# Speciesvirginica 5.576 0.296 18.9 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 1.48 on 147 degrees of freedom
# Multiple R-squared: 0.903, Adjusted R-squared: 0.902
# F-statistic: 683 on 2 and 147 DF, p-value: <2e-16
iris$SpeciesSetosa <- (iris$Species == "setosa") * 1
iris$SpeciesVirginica <- (iris$Species == "virginica") * 1
summary(lm(Petal.Area ~ SpeciesSetosa + SpeciesVirginica, data = iris))
#
# Call:
# lm(formula = Petal.Area ~ SpeciesSetosa + SpeciesVirginica, data = iris)
#
# Residuals:
# Min 1Q Median 3Q Max
# -3.796 -0.520 -0.066 0.671 4.574
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 5.720 0.209 27.4 <2e-16 ***
# SpeciesSetosa -5.355 0.296 -18.1 <2e-16 ***
# SpeciesVirginica 5.576 0.296 18.9 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 1.48 on 147 degrees of freedom
# Multiple R-squared: 0.903, Adjusted R-squared: 0.902
# F-statistic: 683 on 2 and 147 DF, p-value: <2e-16
library(multcomp)
summary(glht(model, linfct = mcp(Species = "Tukey")))
#
# Simultaneous Tests for General Linear Hypotheses
#
# Multiple Comparisons of Means: Tukey Contrasts
#
#
# Fit: lm(formula = Petal.Area ~ Species, data = iris)
#
# Linear Hypotheses:
# Estimate Std. Error t value Pr(>|t|)
# setosa - versicolor == 0 -5.355 0.296 -18.1 <2e-16 ***
# virginica - versicolor == 0 5.576 0.296 18.9 <2e-16 ***
# virginica - setosa == 0 10.931 0.296 37.0 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# (Adjusted p values reported -- single-step method)
# (Intercept) Speciessetosa Speciesvirginica
# 5.72 -5.35 5.58
iris$FittedPA <- round(fitted(model),2); unique(iris[,c("Species","FittedPA")])
# Species FittedPA
# 1 setosa 0.37
# 51 versicolor 5.72
# 101 virginica 11.30
model3 <- lm(Petal.Area ~ Sepal.Length + Sepal.Width + Species, data = iris)
library(mosaic)
msummary(model3) # more concise summary
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) -5.056 1.146 -4.41 2.0e-05 ***
# Sepal.Length 1.399 0.221 6.34 2.7e-09 ***
# Sepal.Width 0.892 0.334 2.67 0.0085 **
# Speciessetosa -4.641 0.439 -10.57 < 2e-16 ***
# Speciesvirginica 4.482 0.264 17.00 < 2e-16 ***
#
# Residual standard error: 1.17 on 145 degrees of freedom
# Multiple R-squared: 0.94, Adjusted R-squared: 0.939
# F-statistic: 571 on 4 and 145 DF, p-value: <2e-16
confint(model3) # 95% confidence intervals
# 2.5 % 97.5 %
# (Intercept) -7.32117 -2.7901
# Sepal.Length 0.96294 1.8351
# Sepal.Width 0.23141 1.5531
# Speciessetosa -5.50889 -3.7728
# Speciesvirginica 3.96071 5.0025
par(mfrow = c(1, 2))
crPlot(model3, var = "Sepal.Length") # from library(car)
crPlot(model3, var = "Sepal.Width") # suggests non-linear pattern (see lecture 5)
plot(model3, which = 1)
ncvTest(model3) # from library(car)
# Non-constant Variance Score Test
# Variance formula: ~ fitted.values
# Chisquare = 29.075 Df = 1 p = 6.9628e-08
library(MASS)
boxcox(model3) # shows optimal transformation at around 0.3
altmodel3 <- lm(Petal.Area^0.3 ~ Sepal.Length + Sepal.Width + Species, data = iris)
msummary(altmodel3)
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.8400 0.0899 9.35 < 2e-16 ***
# Sepal.Length 0.1005 0.0173 5.81 3.8e-08 ***
# Sepal.Width 0.0867 0.0262 3.31 0.0012 **
# Speciessetosa -0.9174 0.0344 -26.64 < 2e-16 ***
# Speciesvirginica 0.3015 0.0207 14.59 < 2e-16 ***
#
# Residual standard error: 0.0915 on 145 degrees of freedom
# Multiple R-squared: 0.975, Adjusted R-squared: 0.975
# F-statistic: 1.44e+03 on 4 and 145 DF, p-value: <2e-16
ncvTest(altmodel3)
# Non-constant Variance Score Test
# Variance formula: ~ fitted.values
# Chisquare = 2.1766 Df = 1 p = 0.14013
ncvTest(altmodel3, ~Sepal.Length)
# Non-constant Variance Score Test
# Variance formula: ~ Sepal.Length
# Chisquare = 1.4761 Df = 1 p = 0.22439
ncvTest(altmodel3, ~Sepal.Width)
# Non-constant Variance Score Test
# Variance formula: ~ Sepal.Width
# Chisquare = 2.1062 Df = 1 p = 0.1467
ncvTest(altmodel3, ~Species)
# Non-constant Variance Score Test
# Variance formula: ~ Species
# Chisquare = 2.3906 Df = 2 p = 0.30262
car::vif(altmodel3) # Preferably VIF < 5
# GVIF Df GVIF^(1/(2*Df))
# Sepal.Length 3.6484 1 1.9101
# Sepal.Width 2.3215 1 1.5237
# Species 6.0044 2 1.5654
cor(iris$Sepal.Length, iris$Sepal.Width) # Not caused by this pair
# [1] -0.11757
msummary(lm(Sepal.Width ~ Species, data = iris))
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 2.7700 0.0480 57.66 <2e-16 ***
# Speciessetosa 0.6580 0.0679 9.69 <2e-16 ***
# Speciesvirginica 0.2040 0.0679 3.00 0.0031 **
#
# Residual standard error: 0.34 on 147 degrees of freedom
# Multiple R-squared: 0.401, Adjusted R-squared: 0.393
# F-statistic: 49.2 on 2 and 147 DF, p-value: <2e-16
msummary(lm(Sepal.Length ~ Species, data = iris)) # Highest R^2: potentially problematic
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 5.9360 0.0728 81.54 < 2e-16 ***
# Speciessetosa -0.9300 0.1030 -9.03 8.8e-16 ***
# Speciesvirginica 0.6520 0.1030 6.33 2.8e-09 ***
#
# Residual standard error: 0.515 on 147 degrees of freedom
# Multiple R-squared: 0.619, Adjusted R-squared: 0.614
# F-statistic: 119 on 2 and 147 DF, p-value: <2e-16
# Exclude length (best choice)
AIC(m3 <- lm(Petal.Area^0.3 ~ Sepal.Width + Species, data = iris))
# [1] -255.34
# Or exclude species (worst choice)
AIC(m3b <- lm(Petal.Area^0.3 ~ Sepal.Width + Sepal.Length, data = iris))
# [1] 14.039
car::vif(m3) # OK
# GVIF Df GVIF^(1/(2*Df))
# Sepal.Width 1.6688 1 1.2918
# Species 1.6688 2 1.1366
summary(m3)
#
# Call:
# lm(formula = Petal.Area^0.3 ~ Sepal.Width + Species, data = iris)
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.26966 -0.06716 0.00023 0.05874 0.29275
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 1.2130 0.0696 17.43 < 2e-16 ***
# Sepal.Width 0.1675 0.0246 6.81 2.4e-10 ***
# Speciessetosa -1.0640 0.0259 -41.04 < 2e-16 ***
# Speciesvirginica 0.3506 0.0209 16.80 < 2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 0.101 on 146 degrees of freedom
# Multiple R-squared: 0.97, Adjusted R-squared: 0.969
# F-statistic: 1.56e+03 on 3 and 146 DF, p-value: <2e-16
durbinWatsonTest(m3) # from library(car)
# lag Autocorrelation D-W Statistic p-value
# 1 -0.016722 2.0218 0.99
# Alternative hypothesis: rho != 0
acf(resid(m3)) # autocorrelation only expected for time-series data
shapiro.test(resid(m3))$p.value
# [1] 0.65129
plot(m3, which = 2)
complexmodel <- lm(Petal.Area^0.3 ~ Sepal.Width * Species, data = iris)
bestmodel <- step(complexmodel)
msummary(bestmodel)
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.9498 0.1236 7.68 2.2e-12 ***
# Sepal.Width 0.2625 0.0444 5.92 2.3e-08 ***
# Speciessetosa -0.4505 0.1770 -2.55 0.0120 *
# Speciesvirginica 0.4616 0.1788 2.58 0.0108 *
# Sepal.Width:Speciessetosa -0.1972 0.0576 -3.42 0.0008 ***
# Sepal.Width:Speciesvirginica -0.0438 0.0619 -0.71 0.4799
#
# Residual standard error: 0.0974 on 144 degrees of freedom
# Multiple R-squared: 0.972, Adjusted R-squared: 0.971
# F-statistic: 1.01e+03 on 5 and 144 DF, p-value: <2e-16
plotSlopes(bestmodel, Sepal.Width, modx = Species)
# (Intercept) Sepal.Width Speciessetosa Speciesvirginica Sepal.Width:Speciessetosa
# [1,] 0.95 0.26 -0.45 0.46 -0.2
# Sepal.Width:Speciesvirginica
# [1,] -0.04
head(iris[, c("Sepal.Length", "Sepal.Width", "Species", "FittedPA")], 1)
# Sepal.Length Sepal.Width Species FittedPA
# 1 5.1 3.5 setosa 0.72783
similarmodel <- lm(Petal.Area^0.3 ~ Species + Sepal.Width:Species, data = iris)
msummary(similarmodel)
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.9498 0.1236 7.68 2.2e-12 ***
# Speciessetosa -0.4505 0.1770 -2.55 0.012 *
# Speciesvirginica 0.4616 0.1788 2.58 0.011 *
# Speciesversicolor:Sepal.Width 0.2625 0.0444 5.92 2.3e-08 ***
# Speciessetosa:Sepal.Width 0.0653 0.0367 1.78 0.077 .
# Speciesvirginica:Sepal.Width 0.2187 0.0432 5.07 1.2e-06 ***
#
# Residual standard error: 0.0974 on 144 degrees of freedom
# Multiple R-squared: 0.972, Adjusted R-squared: 0.971
# F-statistic: 1.01e+03 on 5 and 144 DF, p-value: <2e-16
AIC(similarmodel) - AIC(bestmodel)
# [1] 0
plot(bestmodel, which = 2) # some outliers in residuals
iris2 <- iris[abs(scale(resid(bestmodel))) < 2, ] # exclude outliers
(noutliers <- sum(abs(scale(resid(bestmodel))) >= 2)) # 7 outliers
# [1] 7
bestmodel2 <- lm(Petal.Area^0.3 ~ Sepal.Width * Species, data = iris2)
msummary(bestmodel2) # only small changes in p-values: same pattern as before
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.7921 0.1096 7.23 3.1e-11 ***
# Sepal.Width 0.3139 0.0391 8.04 3.8e-13 ***
# Speciessetosa -0.3735 0.1552 -2.41 0.01745 *
# Speciesvirginica 0.5412 0.1556 3.48 0.00068 ***
# Sepal.Width:Speciessetosa -0.2268 0.0505 -4.49 1.5e-05 ***
# Sepal.Width:Speciesvirginica -0.0712 0.0537 -1.33 0.18710
#
# Residual standard error: 0.082 on 137 degrees of freedom
# Multiple R-squared: 0.98, Adjusted R-squared: 0.98
# F-statistic: 1.37e+03 on 5 and 137 DF, p-value: <2e-16
par(mfrow = c(1, 2))
plot(bestmodel, which = 2, main = "original model")
plot(bestmodel2, which = 2, main = "trimmed model")
# Format of influence plot:
# x-axis: hat values (leverage) - amount of influence a predictor has on fitted values
# y-axis: standardized residuals
# circle size: Cook's distance - effect of removing observation on coefficients & fitted values
outliers <- influencePlot(bestmodel) # from library(car)
library(rms)
bm.val <- ols(Petal.Area^0.3 ~ Sepal.Width * Species, data = iris, x = TRUE, y = TRUE) # specify model as ols
validate(bm.val, B = 1000)
# index.orig training test optimism index.corrected n
# R-square 0.9724 0.9732 0.9712 0.0020 0.9703 1000
# MSE 0.0091 0.0088 0.0095 -0.0008 0.0099 1000
# g 0.6208 0.6172 0.6206 -0.0034 0.6241 1000
# Intercept 0.0000 0.0000 0.0017 -0.0017 0.0017 1000
# Slope 1.0000 1.0000 0.9987 0.0013 0.9987 1000
bestmodel.boot <- Boot(bestmodel, R = 1000) # from library(car)
summary(bestmodel.boot)
# R original bootBias bootSE bootMed
# (Intercept) 1000 0.9498 -0.004888 0.1335 0.9468
# Sepal.Width 1000 0.2625 0.001803 0.0456 0.2641
# Speciessetosa 1000 -0.4505 0.001771 0.1967 -0.4409
# Speciesvirginica 1000 0.4616 0.006549 0.1854 0.4639
# Sepal.Width:Speciessetosa 1000 -0.1972 -0.000931 0.0625 -0.1985
# Sepal.Width:Speciesvirginica 1000 -0.0438 -0.002436 0.0616 -0.0454
confint(bestmodel.boot, type = "norm")
# Bootstrap quantiles, type = normal
#
# 2.5 % 97.5 %
# (Intercept) 0.693062 1.216255
# Sepal.Width 0.171389 0.350029
# Speciessetosa -0.837843 -0.066748
# Speciesvirginica 0.091734 0.818301
# Sepal.Width:Speciessetosa -0.318811 -0.073731
# Sepal.Width:Speciesvirginica -0.162137 0.079332
library(lmSupport)
modelEffectSizes(bestmodel)
# lm(formula = Petal.Area^0.3 ~ Sepal.Width * Species, data = iris)
#
# Coefficients
# SSR df pEta-sqr dR-sqr
# (Intercept) 0.5602 1 0.2907 NA
# Sepal.Width 0.3325 1 0.1956 0.0067
# Species 0.2415 2 0.1501 0.0049
# Sepal.Width:Species 0.1304 2 0.0871 0.0026
#
# Sum of squared errors (SSE): 1.4
# Sum of squared total (SST): 49.5
glm
with family="binomial"
or using lrm
R
: plogis(x)
m <- glm(SpeciesVirginica ~ Petal.Area, data = iris, family = "binomial") # dependent variable equals 0 or 1
msummary(m)
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -19.492 5.241 -3.72 0.00020 ***
# Petal.Area 2.440 0.665 3.67 0.00024 ***
#
# (Dispersion parameter for binomial family taken to be 1)
#
# Null deviance: 190.954 on 149 degrees of freedom
# Residual deviance: 22.333 on 148 degrees of freedom
# AIC: 26.33
#
# Number of Fisher Scoring iterations: 9
(malt <- lrm(SpeciesVirginica ~ Petal.Area, data = iris)) # from library(rms)
#
# Logistic Regression Model
#
# lrm(formula = SpeciesVirginica ~ Petal.Area, data = iris)
# Model Likelihood Discrimination Rank Discrim.
# Ratio Test Indexes Indexes
# Obs 150 LR chi2 168.62 R2 0.938 C 0.997
# 0 100 d.f. 1 g 13.075 Dxy 0.994
# 1 50 Pr(> chi2) <0.0001 gr 476765.314 gamma 0.995
# max |deriv| 7e-08 gp 0.444 tau-a 0.445
# Brier 0.025
#
# Coef S.E. Wald Z Pr(>|Z|)
# Intercept -19.4916 5.2409 -3.72 0.0002
# Petal.Area 2.4401 0.6648 3.67 0.0002
# chance for an iris with a
# petal area of 9 to be a
# Virginica iris
logit = coef(m)["(Intercept)"] +
9 * coef(m)["Petal.Area"]
logit
# [1] 2.4692
plogis(logit)
# [1] 0.92195
# chance for an iris with a
# petal area of 7 to be a
# Virginica iris
logit = coef(m)["(Intercept)"] +
7 * coef(m)["Petal.Area"]
logit
# [1] -2.411
plogis(logit)
# [1] 0.08234
crPlot
from library(car)
can be used to assess thisvif
from library(car)
can be used to assess thisvalidate()
from the rms
package applied to the model fit with lrm()
influencePlot()
can be used together with the glm
result glm(cbind(ncorrect,nincorrect) ~ ...)
as opposed toglm(correct ~ ...)
glm()
using family='poisson'
library(swirl); install_from_swirl('Regression_Models')
Thank you for your attention!