diff --git a/ETL.r b/ETL.r index 9881bf2..4912225 100644 --- a/ETL.r +++ b/ETL.r @@ -170,6 +170,13 @@ socioeconomic <- read.csv('Census_Data_-_Selected_socioeconomic_indicators_in_Ch race <- read.csv('race.csv', stringsAsFactors = FALSE) +## NHW: Non-hispanic white +## NHB: non-hispanic black +## NHAM: american indian/alaskan native, non hispanic +## NHAS: asian, not hispanic +## NHOTHER: other single race, not hispanic + + block_data$Neighborhood <- as.character(block_data$Neighborhood) ## Set up table to match subneighborhoods diff --git a/Plot of Assault..Homicide..png b/Plot of Assault..Homicide..png new file mode 100644 index 0000000..36a9384 Binary files /dev/null and b/Plot of Assault..Homicide..png differ diff --git a/Plot of Below.Poverty.Level.png b/Plot of Below.Poverty.Level.png new file mode 100644 index 0000000..32dc31c Binary files /dev/null and b/Plot of Below.Poverty.Level.png differ diff --git a/Plot of Birth.Rate.png b/Plot of Birth.Rate.png new file mode 100644 index 0000000..8296f96 Binary files /dev/null and b/Plot of Birth.Rate.png differ diff --git a/Plot of Breast.cancer.in.females.png b/Plot of Breast.cancer.in.females.png new file mode 100644 index 0000000..05ddd92 Binary files /dev/null and b/Plot of Breast.cancer.in.females.png differ diff --git a/Plot of CTA_counts.png b/Plot of CTA_counts.png new file mode 100644 index 0000000..6ea060b Binary files /dev/null and b/Plot of CTA_counts.png differ diff --git a/Plot of Cancer..All.Sites..png b/Plot of Cancer..All.Sites..png new file mode 100644 index 0000000..584ad3f Binary files /dev/null and b/Plot of Cancer..All.Sites..png differ diff --git a/Plot of Childhood.Blood.Lead.Level.Screening.png b/Plot of Childhood.Blood.Lead.Level.Screening.png new file mode 100644 index 0000000..6174645 Binary files /dev/null and b/Plot of Childhood.Blood.Lead.Level.Screening.png differ diff --git a/Plot of Childhood.Lead.Poisoning.png b/Plot of Childhood.Lead.Poisoning.png new file mode 100644 index 0000000..7dc60fb Binary files /dev/null and b/Plot of Childhood.Lead.Poisoning.png differ diff --git a/Plot of Colorectal.Cancer.png b/Plot of Colorectal.Cancer.png new file mode 100644 index 0000000..995ded4 Binary files /dev/null and b/Plot of Colorectal.Cancer.png differ diff --git a/Plot of Community.Area.Number.png b/Plot of Community.Area.Number.png new file mode 100644 index 0000000..94bd3fc Binary files /dev/null and b/Plot of Community.Area.Number.png differ diff --git a/Plot of Community.Area.x.png b/Plot of Community.Area.x.png new file mode 100644 index 0000000..f1d1139 Binary files /dev/null and b/Plot of Community.Area.x.png differ diff --git a/Plot of Crowded.Housing.png b/Plot of Crowded.Housing.png new file mode 100644 index 0000000..9263244 Binary files /dev/null and b/Plot of Crowded.Housing.png differ diff --git a/Plot of Dependency.png b/Plot of Dependency.png new file mode 100644 index 0000000..aaded8b Binary files /dev/null and b/Plot of Dependency.png differ diff --git a/Plot of Diabetes.related.png b/Plot of Diabetes.related.png new file mode 100644 index 0000000..5cf0d0d Binary files /dev/null and b/Plot of Diabetes.related.png differ diff --git a/Plot of Firearm.related.png b/Plot of Firearm.related.png new file mode 100644 index 0000000..cf20deb Binary files /dev/null and b/Plot of Firearm.related.png differ diff --git a/Plot of General.Fertility.Rate.png b/Plot of General.Fertility.Rate.png new file mode 100644 index 0000000..5ba1cce Binary files /dev/null and b/Plot of General.Fertility.Rate.png differ diff --git a/Plot of Gonorrhea.in.Females.png b/Plot of Gonorrhea.in.Females.png new file mode 100644 index 0000000..11e1652 Binary files /dev/null and b/Plot of Gonorrhea.in.Females.png differ diff --git a/Plot of Gonorrhea.in.Males.png b/Plot of Gonorrhea.in.Males.png new file mode 100644 index 0000000..14d4e31 Binary files /dev/null and b/Plot of Gonorrhea.in.Males.png differ diff --git a/Plot of HARDSHIP.INDEX.png b/Plot of HARDSHIP.INDEX.png new file mode 100644 index 0000000..60314a2 Binary files /dev/null and b/Plot of HARDSHIP.INDEX.png differ diff --git a/Plot of HISP.png b/Plot of HISP.png new file mode 100644 index 0000000..cb42245 Binary files /dev/null and b/Plot of HISP.png differ diff --git a/Plot of HISP_p.png b/Plot of HISP_p.png new file mode 100644 index 0000000..badf69d Binary files /dev/null and b/Plot of HISP_p.png differ diff --git a/Plot of Infant.Mortality.Rate.png b/Plot of Infant.Mortality.Rate.png new file mode 100644 index 0000000..05d89b4 Binary files /dev/null and b/Plot of Infant.Mortality.Rate.png differ diff --git a/Plot of Low.Birth.Weight.png b/Plot of Low.Birth.Weight.png new file mode 100644 index 0000000..985340e Binary files /dev/null and b/Plot of Low.Birth.Weight.png differ diff --git a/Plot of Lung.Cancer.png b/Plot of Lung.Cancer.png new file mode 100644 index 0000000..12af946 Binary files /dev/null and b/Plot of Lung.Cancer.png differ diff --git a/Plot of Multiple.Race...png b/Plot of Multiple.Race...png new file mode 100644 index 0000000..8a71f3d Binary files /dev/null and b/Plot of Multiple.Race...png differ diff --git a/Plot of Multiple.Race.._p.png b/Plot of Multiple.Race.._p.png new file mode 100644 index 0000000..4ac8bda Binary files /dev/null and b/Plot of Multiple.Race.._p.png differ diff --git a/Plot of NHAM.png b/Plot of NHAM.png new file mode 100644 index 0000000..afa4e8f Binary files /dev/null and b/Plot of NHAM.png differ diff --git a/Plot of NHAM_p.png b/Plot of NHAM_p.png new file mode 100644 index 0000000..ac54b57 Binary files /dev/null and b/Plot of NHAM_p.png differ diff --git a/Plot of NHAS.png b/Plot of NHAS.png new file mode 100644 index 0000000..77ebe2c Binary files /dev/null and b/Plot of NHAS.png differ diff --git a/Plot of NHAS_p.png b/Plot of NHAS_p.png new file mode 100644 index 0000000..fce11c3 Binary files /dev/null and b/Plot of NHAS_p.png differ diff --git a/Plot of NHB.png b/Plot of NHB.png new file mode 100644 index 0000000..681e844 Binary files /dev/null and b/Plot of NHB.png differ diff --git a/Plot of NHB_p.png b/Plot of NHB_p.png new file mode 100644 index 0000000..2584e9a Binary files /dev/null and b/Plot of NHB_p.png differ diff --git a/Plot of NHOTHER.png b/Plot of NHOTHER.png new file mode 100644 index 0000000..a9e7071 Binary files /dev/null and b/Plot of NHOTHER.png differ diff --git a/Plot of NHOTHER_p.png b/Plot of NHOTHER_p.png new file mode 100644 index 0000000..32a19a4 Binary files /dev/null and b/Plot of NHOTHER_p.png differ diff --git a/Plot of NHW.png b/Plot of NHW.png new file mode 100644 index 0000000..ca72e66 Binary files /dev/null and b/Plot of NHW.png differ diff --git a/Plot of NHW_p.png b/Plot of NHW_p.png new file mode 100644 index 0000000..41bc2e1 Binary files /dev/null and b/Plot of NHW_p.png differ diff --git a/Plot of No.High.School.Diploma.png b/Plot of No.High.School.Diploma.png new file mode 100644 index 0000000..94b3e66 Binary files /dev/null and b/Plot of No.High.School.Diploma.png differ diff --git a/Plot of PER.CAPITA.INCOME.png b/Plot of PER.CAPITA.INCOME.png new file mode 100644 index 0000000..0fad60e Binary files /dev/null and b/Plot of PER.CAPITA.INCOME.png differ diff --git a/Plot of PERCENT.AGED.16..UNEMPLOYED.png b/Plot of PERCENT.AGED.16..UNEMPLOYED.png new file mode 100644 index 0000000..7bdff67 Binary files /dev/null and b/Plot of PERCENT.AGED.16..UNEMPLOYED.png differ diff --git a/Plot of PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA.png b/Plot of PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA.png new file mode 100644 index 0000000..78b1665 Binary files /dev/null and b/Plot of PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA.png differ diff --git a/Plot of PERCENT.AGED.UNDER.18.OR.OVER.64.png b/Plot of PERCENT.AGED.UNDER.18.OR.OVER.64.png new file mode 100644 index 0000000..bff957d Binary files /dev/null and b/Plot of PERCENT.AGED.UNDER.18.OR.OVER.64.png differ diff --git a/Plot of PERCENT.HOUSEHOLDS.BELOW.POVERTY.png b/Plot of PERCENT.HOUSEHOLDS.BELOW.POVERTY.png new file mode 100644 index 0000000..414b4c4 Binary files /dev/null and b/Plot of PERCENT.HOUSEHOLDS.BELOW.POVERTY.png differ diff --git a/Plot of PERCENT.OF.HOUSING.CROWDED.png b/Plot of PERCENT.OF.HOUSING.CROWDED.png new file mode 100644 index 0000000..32dc4f5 Binary files /dev/null and b/Plot of PERCENT.OF.HOUSING.CROWDED.png differ diff --git a/Plot of Prenatal.Care.Beginning.in.First.Trimester.png b/Plot of Prenatal.Care.Beginning.in.First.Trimester.png new file mode 100644 index 0000000..40c1258 Binary files /dev/null and b/Plot of Prenatal.Care.Beginning.in.First.Trimester.png differ diff --git a/Plot of Preterm.Births.png b/Plot of Preterm.Births.png new file mode 100644 index 0000000..1ea855e Binary files /dev/null and b/Plot of Preterm.Births.png differ diff --git a/Plot of Prostate.Cancer.in.Males.png b/Plot of Prostate.Cancer.in.Males.png new file mode 100644 index 0000000..bd27883 Binary files /dev/null and b/Plot of Prostate.Cancer.in.Males.png differ diff --git a/Plot of Stroke..Cerebrovascular.Disease..png b/Plot of Stroke..Cerebrovascular.Disease..png new file mode 100644 index 0000000..1259362 Binary files /dev/null and b/Plot of Stroke..Cerebrovascular.Disease..png differ diff --git a/Plot of TOTAL.POPULATION.png b/Plot of TOTAL.POPULATION.png new file mode 100644 index 0000000..3736894 Binary files /dev/null and b/Plot of TOTAL.POPULATION.png differ diff --git a/Plot of TOTAL.png b/Plot of TOTAL.png new file mode 100644 index 0000000..f90794a Binary files /dev/null and b/Plot of TOTAL.png differ diff --git a/Plot of Teen.Birth.Rate.png b/Plot of Teen.Birth.Rate.png new file mode 100644 index 0000000..4c02f00 Binary files /dev/null and b/Plot of Teen.Birth.Rate.png differ diff --git a/Plot of Tuberculosis.png b/Plot of Tuberculosis.png new file mode 100644 index 0000000..9626da8 Binary files /dev/null and b/Plot of Tuberculosis.png differ diff --git a/Plot of Unemployment.png b/Plot of Unemployment.png new file mode 100644 index 0000000..edb7dfc Binary files /dev/null and b/Plot of Unemployment.png differ diff --git a/Plot of desert.png b/Plot of desert.png new file mode 100644 index 0000000..7f13dc9 Binary files /dev/null and b/Plot of desert.png differ diff --git a/Plot of nearest_supermarket.png b/Plot of nearest_supermarket.png new file mode 100644 index 0000000..d1a172a Binary files /dev/null and b/Plot of nearest_supermarket.png differ diff --git a/Plot of store_counts.png b/Plot of store_counts.png new file mode 100644 index 0000000..014119e Binary files /dev/null and b/Plot of store_counts.png differ diff --git a/Plot of vacant_counts.png b/Plot of vacant_counts.png new file mode 100644 index 0000000..98e5af8 Binary files /dev/null and b/Plot of vacant_counts.png differ diff --git a/model.r b/model.r new file mode 100644 index 0000000..db92a42 --- /dev/null +++ b/model.r @@ -0,0 +1,86 @@ +######################## +## Modeling code file ## +######################## + +## load packages +library(lme4) + +## load data +load('all_data') +all_data$desert <- as.numeric(all_data$desert) + +model_data <- subset(all_data, TOTAL.POPULATION > 0) + + +###################### +## Complete pooling ## +###################### + +cp <- glm(desert ~ CTA_counts + vacant_counts, family = 'binomial', data = model_data) +summary(cp) +################ +## No pooling ## +################ + +np <- glm(desert ~ CTA_counts + vacant_counts + Neighborhood, family = 'binomial', data = model_data) +summary(np) + +##################### +## Partial pooling ## +##################### + +pp <- glmer(desert ~ CTA_counts + vacant_counts + (1 | Neighborhood), data = model_data, family = 'binomial') +summary(pp) + +################## +## Hierarchical ## +################## + +mlm <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood), + data = model_data, + family = 'binomial') +summary(mlm) + +mlm_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood), + data = model_data, + family = 'binomial') +summary(mlm_2) + + +## rescale variables +exclude <- c('Neighborhood', 'TRACT_BLOC','STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE10', 'GEOID10', 'NAME10', 'Longitude', 'Latitude', 'Community.Area.y', 'nearest_supermarket', 'Community.Area.x', 'store_counts', 'desert') +potential_covariates <- setdiff(names(all_data), exclude) +model_data_scale <- model_data +for (var in potential_covariates) {model_data_scale[var] <- as.numeric(scale(model_data[var]))} + +mlm_c <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial') +summary(mlm_c) + +mlm_c_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial') +summary(mlm_c_2) + +mlm_c_3 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial') +summary(mlm_c_3) + +mlm_c_4 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial') +summary(mlm_c_4) + +mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + TOTAL.POPULATION + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial') +summary(mlm_c_5) + +mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHW_p + NHB_p + HISP_p + PER.CAPITA.INCOME + TOTAL.POPULATION + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial') +summary(mlm_c_5) + +cor(model_data[, c('Diabetes.related', 'NHB_p', 'NHW_p', 'HISP_p')]) diff --git a/plots.r b/plots.r index 103f302..03d76fe 100644 --- a/plots.r +++ b/plots.r @@ -7,7 +7,7 @@ load('all_data') ## Plot location of missing blocks library(ggplot2) -pct_missing <- lapply(all_data, function(var) mean(is.na(var))) +pct_missing <- lapply(all_data, function(var) sum(is.na(var))) plot_data <- all_data plot_data$missing <- is.na(plot_data$Neighborhood) @@ -57,3 +57,43 @@ t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.6666666 head(t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.66666666666666+lon_0=-88.33333333333333', inv = TRUE)) head(t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.66666666666666 +lon_0=-88.33333333333333 +k=0.9999749999999999 +x_0=300000 +y_0=0', inv = TRUE)) + + +###################### +## Univariate Plots ## +###################### + +all_data$desert <- as.numeric(all_data$desert) + + + + + +summary(glm(desert ~ NHB_p, family = 'binomial', data = all_data)) + +ggplot(all_data, aes(x = NHB_p, y = desert)) + + geom_point() + stat_smooth(method = 'glm', method.args = list(family = 'binomial')) + + +summary(glm(desert ~ vacant_counts, family = 'binomial', data = all_data)) + +ggplot(all_data, aes(x = vacant_counts, y = desert)) + + geom_point() + stat_smooth(method = 'glm', method.args = list(family = 'binomial')) + +exclude <- c('Neighborhood', 'TRACT_BLOC','STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE10', 'GEOID10', 'NAME10', 'Longitude', 'Latitude', 'Community.Area.y', 'nearest_supermarket', 'desert', 'Community.Area.x', 'store_counts') +potential_covariates <- setdiff(names(all_data), exclude) + +for (covar in potential_covariates) { + print(covar) + covar_plot <- ggplot(all_data, aes_string(x = covar, y = 'desert')) + + geom_point() + stat_smooth(method = 'glm', method.args = list(family = 'binomial')) + + theme_bw() + ggsave(paste0('Plot of ',covar, '.png'), covar_plot) +} + +########## +## MAPS ## +########## + +all_data$desert_logical <- all_data$desert == 1 +ggplot(all_data, aes(x = Longitude, y = Latitude, color = desert_logical)) + geom_point(alpha = .1) + theme_bw()