Skip to content

Commit

Permalink
Added modeling file and plots
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Berry authored and Daniel Berry committed Nov 17, 2016
1 parent 4c6acc5 commit ad2aa32
Show file tree
Hide file tree
Showing 59 changed files with 134 additions and 1 deletion.
7 changes: 7 additions & 0 deletions ETL.r
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,13 @@ socioeconomic <- read.csv('Census_Data_-_Selected_socioeconomic_indicators_in_Ch

race <- read.csv('race.csv', stringsAsFactors = FALSE)

## NHW: Non-hispanic white
## NHB: non-hispanic black
## NHAM: american indian/alaskan native, non hispanic
## NHAS: asian, not hispanic
## NHOTHER: other single race, not hispanic


block_data$Neighborhood <- as.character(block_data$Neighborhood)

## Set up table to match subneighborhoods
Expand Down
Binary file added Plot of Assault..Homicide..png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Below.Poverty.Level.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Birth.Rate.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Breast.cancer.in.females.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of CTA_counts.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Cancer..All.Sites..png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Childhood.Blood.Lead.Level.Screening.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Childhood.Lead.Poisoning.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Colorectal.Cancer.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Community.Area.Number.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Community.Area.x.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Crowded.Housing.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Dependency.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Diabetes.related.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Firearm.related.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of General.Fertility.Rate.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Gonorrhea.in.Females.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Gonorrhea.in.Males.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of HARDSHIP.INDEX.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of HISP.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of HISP_p.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Infant.Mortality.Rate.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Low.Birth.Weight.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Lung.Cancer.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Multiple.Race...png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Plot of Multiple.Race.._p.png
Binary file added Plot of NHAM.png
Binary file added Plot of NHAM_p.png
Binary file added Plot of NHAS.png
Binary file added Plot of NHAS_p.png
Binary file added Plot of NHB.png
Binary file added Plot of NHB_p.png
Binary file added Plot of NHOTHER.png
Binary file added Plot of NHOTHER_p.png
Binary file added Plot of NHW.png
Binary file added Plot of NHW_p.png
Binary file added Plot of No.High.School.Diploma.png
Binary file added Plot of PER.CAPITA.INCOME.png
Binary file added Plot of PERCENT.AGED.16..UNEMPLOYED.png
Binary file added Plot of PERCENT.AGED.UNDER.18.OR.OVER.64.png
Binary file added Plot of PERCENT.HOUSEHOLDS.BELOW.POVERTY.png
Binary file added Plot of PERCENT.OF.HOUSING.CROWDED.png
Binary file added Plot of Preterm.Births.png
Binary file added Plot of Prostate.Cancer.in.Males.png
Binary file added Plot of Stroke..Cerebrovascular.Disease..png
Binary file added Plot of TOTAL.POPULATION.png
Binary file added Plot of TOTAL.png
Binary file added Plot of Teen.Birth.Rate.png
Binary file added Plot of Tuberculosis.png
Binary file added Plot of Unemployment.png
Binary file added Plot of desert.png
Binary file added Plot of nearest_supermarket.png
Binary file added Plot of store_counts.png
Binary file added Plot of vacant_counts.png
86 changes: 86 additions & 0 deletions model.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
########################
## Modeling code file ##
########################

## load packages
library(lme4)

## load data
load('all_data')
all_data$desert <- as.numeric(all_data$desert)

model_data <- subset(all_data, TOTAL.POPULATION > 0)


######################
## Complete pooling ##
######################

cp <- glm(desert ~ CTA_counts + vacant_counts, family = 'binomial', data = model_data)
summary(cp)
################
## No pooling ##
################

np <- glm(desert ~ CTA_counts + vacant_counts + Neighborhood, family = 'binomial', data = model_data)
summary(np)

#####################
## Partial pooling ##
#####################

pp <- glmer(desert ~ CTA_counts + vacant_counts + (1 | Neighborhood), data = model_data, family = 'binomial')
summary(pp)

##################
## Hierarchical ##
##################

mlm <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
data = model_data,
family = 'binomial')
summary(mlm)

mlm_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
data = model_data,
family = 'binomial')
summary(mlm_2)


## rescale variables
exclude <- c('Neighborhood', 'TRACT_BLOC','STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE10', 'GEOID10', 'NAME10', 'Longitude', 'Latitude', 'Community.Area.y', 'nearest_supermarket', 'Community.Area.x', 'store_counts', 'desert')
potential_covariates <- setdiff(names(all_data), exclude)
model_data_scale <- model_data
for (var in potential_covariates) {model_data_scale[var] <- as.numeric(scale(model_data[var]))}

mlm_c <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c)

mlm_c_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_2)

mlm_c_3 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_3)

mlm_c_4 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_4)

mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_5)

mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHW_p + NHB_p + HISP_p + PER.CAPITA.INCOME + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_5)

cor(model_data[, c('Diabetes.related', 'NHB_p', 'NHW_p', 'HISP_p')])
42 changes: 41 additions & 1 deletion plots.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ load('all_data')
## Plot location of missing blocks
library(ggplot2)

pct_missing <- lapply(all_data, function(var) mean(is.na(var)))
pct_missing <- lapply(all_data, function(var) sum(is.na(var)))

plot_data <- all_data
plot_data$missing <- is.na(plot_data$Neighborhood)
Expand Down Expand Up @@ -57,3 +57,43 @@ t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.6666666
head(t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.66666666666666+lon_0=-88.33333333333333', inv = TRUE))

head(t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.66666666666666 +lon_0=-88.33333333333333 +k=0.9999749999999999 +x_0=300000 +y_0=0', inv = TRUE))


######################
## Univariate Plots ##
######################

all_data$desert <- as.numeric(all_data$desert)





summary(glm(desert ~ NHB_p, family = 'binomial', data = all_data))

ggplot(all_data, aes(x = NHB_p, y = desert)) +
geom_point() + stat_smooth(method = 'glm', method.args = list(family = 'binomial'))


summary(glm(desert ~ vacant_counts, family = 'binomial', data = all_data))

ggplot(all_data, aes(x = vacant_counts, y = desert)) +
geom_point() + stat_smooth(method = 'glm', method.args = list(family = 'binomial'))

exclude <- c('Neighborhood', 'TRACT_BLOC','STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE10', 'GEOID10', 'NAME10', 'Longitude', 'Latitude', 'Community.Area.y', 'nearest_supermarket', 'desert', 'Community.Area.x', 'store_counts')
potential_covariates <- setdiff(names(all_data), exclude)

for (covar in potential_covariates) {
print(covar)
covar_plot <- ggplot(all_data, aes_string(x = covar, y = 'desert')) +
geom_point() + stat_smooth(method = 'glm', method.args = list(family = 'binomial')) +
theme_bw()
ggsave(paste0('Plot of ',covar, '.png'), covar_plot)
}

##########
## MAPS ##
##########

all_data$desert_logical <- all_data$desert == 1
ggplot(all_data, aes(x = Longitude, y = Latitude, color = desert_logical)) + geom_point(alpha = .1) + theme_bw()

0 comments on commit ad2aa32

Please sign in to comment.