-
Notifications
You must be signed in to change notification settings - Fork 1
/
gam-mapgam-reprex.R
74 lines (66 loc) · 3.37 KB
/
gam-mapgam-reprex.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# modgam example ----------------------------------------------------------
# Load base map in SpatialPolygonsDataFrame format
# This map was read from ESRI shapefiles using the readShapePoly function
data(MAmap)
# Load data and create grid on base map
data(MAdata)
gamgrid <- predgrid(MAdata, map=MAmap) # requires PBSmapping package
# Fit crude logistic GAM to the MA data using span size of 50%
# and predict odds ratios for every point on gamgrid
fit1 <- modgam(data=MAdata, rgrid=gamgrid, m="crude", sp=0.5)
# Summary statistics for pointwise crude odds ratios
summary(fit1$exp.fit)
# Summary stats for pointwise crude log odds (linear predictor)
summary(fit1$fit)
# fit adjusted GAM using span size of 50%,
# including a (too small) conditional permutation test
fit2 <- modgam(data=MAdata, rgrid=gamgrid, permute=25, m="adjusted", sp=0.5)
fit2
# fit adjusted GAM by specifiying formula
fit2.f <- modgam(Case~lo(Xcoord,Ycoord)+Smoking + Mercury + Selenium,data=MAdata,
rgrid=gamgrid, sp=0.5)
fit2.f
# Detailed example with a continuous outcome variable, map projections,
# and data trimming: investigating tweet times by geolocation
# NOTE: this example requires the maps, mapproj, and PBSmapping packages
# Convert base map and beer tweet data locations to US Albers projection
# for better distance estimates than using (lat,long) as (X,Y) coords
if(require(maps) & require(mapproj) & require(PBSmapping)) {
USmap <- map("state",projection="albers",parameters=c(29.5,45.5),
plot=FALSE,fill=TRUE,col="transparent")
data(beertweets)
# Reuse last map projection to convert data coordinates
XY <- mapproject(beertweets$longitude,beertweets$latitude)[1:2]
jtime <- julian(beertweets$time)
# Convert tweet dates and times to time of day (24-hour)
tweettime <- as.numeric(jtime-trunc(jtime))*24
beerproj <- data.frame(tweettime, XY[1], XY[2], beertweets$beer)
# Generate grid on the US map, trimmed to range of beer data
USgrid <- predgrid(beerproj, map=USmap)
# Fit adjusted model--adjusting for beer indicator variable
fit3 <- modgam(data=beerproj, rgrid=USgrid, family=gaussian,
reference="none", m="adjusted", sp=0.05)
# Get summary statistics for predicted tweet times across grid points
summary(fit3$fit)
}
# Smoothing survival rates over geolocations with adjustement of Age and Insurance
# Including generating pointwise standard errors and confidence intervals
data(CAdata)
data(CAgrid)
data = CAdata[1:1000,] # use a subset of the California data
# manually set the categorical variables as factors
data$INS = factor(data$INS)
# no formula needed if data are arranged in a specific order (see \code{data} argument)
fit4 <- modgam(data=data, rgrid=CAgrid, family="survival", sp=0.2)
fit4
# fit the same model using the formula argument, with data columns in any order
fit4.f <- modgam(Surv(time,event)~AGE+factor(INS)+lo(X,Y), data=data,
rgrid = CAgrid, family="survival", sp=0.2)
# Smoothing for two-dimensional chemical exposure instead of geolocation
# case status in 1st column, mercury and selenium in 2nd and 3rd columns
ma2 <- MAdata[,c(1,5:6)]
expgrid <- predgrid(ma2)
fit5 <- modgam(data=ma2,rgrid=expgrid,sp=.5,m="crude")
summary(fit5$exp.fit)
# plot the results, with mercury on the X axis and selenium on the Y axis
plot(fit5, contours="response", arrow=FALSE, axes=TRUE)