
Martijn Wieling
Department of Information Science
R
codefamily="binomial"
R
: plogis(x)
mgcv
version 1.8.11)geo = bam(NotStd ~ s(Longitude, Latitude), data = tuscan, family = "binomial", discrete = T)
vis.gam(geo, view = c("Longitude", "Latitude"), plot.type = "contour", color = "terrain", too.far = 0.045,
main = "")
summary(geo)$p.table
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -0.247 0.0033 -75.1 0
plogis(coef(geo)["(Intercept)"])
# (Intercept)
# 0.438
te()
, which can model an \(N\)-way non-linear interaction:te(Longitude, Latitude, ConceptFreq, d=c(2,1))
te(Longitude, Latitude, ConceptFreq, YearBirth, d=c(2,1,1))
system.time(m <- bam(NotStd ~ te(Longitude, Latitude, ConceptFreq.log.z, SpeakerBirthYear.z, d = c(2,
1, 1)) + CommunitySize.log.z + SpeakerJob_Farmer + SpeakerEduLevel.log.z + SpeakerIsMale + s(Speaker,
bs = "re") + s(Location, bs = "re") + s(Concept, bs = "re") + s(Concept, CommunityRecordingYear.z,
bs = "re") + s(Concept, CommunitySize.log.z, bs = "re") + s(Concept, CommunityAvgIncome.log.z, bs = "re") +
s(Concept, CommunityAvgAge.log.z, bs = "re") + s(Concept, SpeakerJob_Farmer, bs = "re") + s(Concept,
SpeakerJob_Executive_AuxiliaryWorker, bs = "re") + s(Concept, SpeakerEduLevel.log.z, bs = "re") +
s(Concept, SpeakerIsMale, bs = "re"), data = tuscan, family = "binomial", discrete = T, nthreads = 4))
# user system elapsed
# 5120 35 1815
system.time(smry <- summary(m))
# user system elapsed
# 5640.78 6.13 5664.91
smry$p.table
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -0.4372 0.1265 -3.46 5.48e-04
# CommunitySize.log.z -0.0635 0.0225 -2.82 4.80e-03
# SpeakerJob_Farmer 0.0448 0.0168 2.66 7.78e-03
# SpeakerEduLevel.log.z -0.0675 0.0126 -5.37 8.04e-08
# SpeakerIsMale 0.0380 0.0128 2.97 2.98e-03
head(smry$s.table, 1)
# edf Ref.df Chi.sq p-value
# te(SpeakerBirthYear.z,ConceptFreq.log.z,Longitude,Latitude) 225 270 3286 0
# chance for a male farmer in a
# very small village (z-scored
# population size = -2) for
# which the location is unknown
# with a very low education
# level (z-score = -2) to use a
# non-standard lexical form
(logit = coef(m)["(Intercept)"] +
coef(m)["SpeakerIsMale"] +
coef(m)["SpeakerJob_Farmer"] +
-2 * coef(m)["CommunitySize.log.z"] +
-2 * coef(m)["SpeakerEduLevel.log.z"])
# (Intercept)
# -0.0923
plogis(logit) # was: 0.438 (43.8%)
# (Intercept)
# 0.477
tail(smry$s.table, 11)
# edf Ref.df Chi.sq p-value
# s(Speaker) 90.2 2005 97.7 1.45e-02
# s(Location) 175.2 209 5675.4 1.91e-93
# s(Concept) 167.0 168 437792.1 0.00e+00
# s(CommunityRecordingYear.z,Concept) 158.9 170 157471.2 4.46e-184
# s(CommunitySize.log.z,Concept) 149.9 169 29933.0 1.70e-111
# s(CommunityAvgIncome.log.z,Concept) 158.1 170 143338.9 2.12e-160
# s(CommunityAvgAge.log.z,Concept) 154.4 170 110802.6 5.39e-196
# s(SpeakerJob_Farmer,Concept) 85.9 169 26191.6 1.75e-07
# s(SpeakerJob_Executive_AuxiliaryWorker,Concept) 53.3 170 3325.5 6.77e-04
# s(SpeakerEduLevel.log.z,Concept) 139.1 169 9421.6 4.67e-49
# s(SpeakerIsMale,Concept) 85.4 169 111227.6 1.00e-10
s()
to model two-dimensional interactions on the same scalete()
family="binomial"
)Thank you for your attention!