-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.r.orig
163 lines (129 loc) · 6.1 KB
/
model.r.orig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
########################
## Modeling code file ##
########################
## load packages
library(lme4)
## load data
load('all_data')
all_data$desert <- as.numeric(all_data$desert)
model_data <- subset(all_data, TOTAL.POPULATION > 0)
######################
## Complete pooling ##
######################
cp <- glm(desert ~ CTA_counts + vacant_counts, family = 'binomial', data = model_data)
summary(cp)
################
## No pooling ##
################
np <- glm(desert ~ CTA_counts + vacant_counts + Neighborhood, family = 'binomial', data = model_data)
summary(np)
#####################
## Partial pooling ##
#####################
pp <- glmer(desert ~ CTA_counts + vacant_counts + (1 | Neighborhood), data = model_data, family = 'binomial')
summary(pp)
##################
## Hierarchical ##
##################
## mlm <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
## data = model_data,
## family = 'binomial')
## summary(mlm)
## mlm_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
## data = model_data,
## family = 'binomial')
## summary(mlm_2)
## rescale variables
exclude <- c('Neighborhood', 'TRACT_BLOC','STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE10', 'GEOID10', 'NAME10', 'Longitude', 'Latitude', 'Community.Area.y', 'nearest_supermarket', 'Community.Area.x', 'store_counts', 'desert')
potential_covariates <- setdiff(names(all_data), exclude)
model_data_scale <- model_data
for (var in potential_covariates) {model_data_scale[var] <- as.numeric(scale(model_data[var]))}
## mlm_c <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c)
## mlm_c_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_2)
## mlm_c_3 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_3)
## mlm_c_4 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_4)
## mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_5)
## mlm_c_6 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + NHB_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_6)
## mlm_c_7 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_7)
## mlm_c_8 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_8)
## mlm_c_9 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + NHB_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_9)
## mlm_c_10 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + Diabetes.related + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_10)
cor(model_data[, c('Diabetes.related', 'NHB_p', 'NHW_p', 'HISP_p', 'PER.CAPITA.INCOME')])
search_covariates <- setdiff(potential_covariates, c('vacant_counts', 'CTA_counts', 'Community.Area.Number'))
best_model <- glmer(desert ~ vacant_counts + CTA_counts + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
in_vars <- c()
out_vars <- search_covariates
library(parallel)
library(doMC)
registerDoMC(detectCores() - 3)
old_aic <- AIC(best_model)
fit_model <- function(i) {
print(paste('fitting:',i))
form <- as.formula(paste('desert ~ vacant_counts + CTA_counts +',
paste(c(in_vars, out_vars[i]), collapse = '+'),
'+(1|Neighborhood)'))
model <- glmer(form, data = model_data_scale, family = 'binomial')
return(c(i, AIC(model)))
}
while(TRUE) {
search_results <- foreach(i=1:length(out_vars), .combine = 'rbind') %dopar% fit_model(i)
min_aic <- which.min(search_results[,2])
if (min_aic < old_aic) {
print(paste('ADDING:',out_vars[min_aic]))
in_vars <- c(in_vars, out_vars[min_aic])
out_vars <- setdiff(out_vars, out_vars[min_aic])
print(search_results[min_aic,])
}
else {
break
}
}
print(paste('Final model:', in_vars, collapse = ', '))
<<<<<<< HEAD
model <- glmer(desert ~ CTA_counts + vacant_counts + Gonorrhea.in.Females + Cancer..All.Sites. + TOTAL.POPULATION + NHAS +
Dependency +Childhood.Lead.Poisoning + Prenatal.Care.Beginning.in.First.Trimester + Gonorrhea.in.Males
+ NHAM_p + Multiple.Race.. + Stroke..Cerebrovascular.Disease. + Firearm.related + Tuberculosis + NHW_p +
Teen.Birth.Rate + No.High.School.Diploma + Lung.Cancer +
(1|Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(model)
=======
form <- as.formula(paste('desert ~ vacant_counts + CTA_counts +',
paste(in_vars, collapse = '+'),
'+(1|Neighborhood)'))
final_model <- glmer(form, data = model_data_scale, family = 'binomial')
save(final_model, 'final_model')
>>>>>>> fb724787ed67089995bb241b17b3acfa6bf91121