diff --git a/.DS_Store b/.DS_Store index 1d3518b..f95d380 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..35d6a0e --- /dev/null +++ b/.Rhistory @@ -0,0 +1,474 @@ +?read.csv +?read.table +head(vacant_raw) +str(vacant_raw) +head(vacant_raw$X.7) +str(vacant_raw) +str(vacant_raw) +str(vacant_raw) +install.packages('rgeos') +install.packages('rgeos') +?getwd +head(blocks_raw) +library(sp) +?SpatialPolygonsDataFrame +?Polygon +tmp <- blocks_raw[1,1] +tmp +?%>% +tmp %>% split('(') +tmp %>% substr(16, -3) +tmp %>% substr(16) +tmp %>% substr(16, 20) +tmp %>% str_sub(17, -3) +tmp %>% str_sub(17, -4) +tmp %>% str_sub(17, -4) %>% split(',') +tmp %>% str_sub(17, -4) %>% str_split(',') +tmp %>% str_sub(17, -4) %>% str_split(', ') +tmp %>% str_sub(17, -4) %>% str_split(', ') %>% str_split(' ') +tmp %>% str_sub(17, -4) %>% str_split(', ') +lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), str_split(' ')) +lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' ')) +lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) as.numeric(str_split(s, ' '))) +unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) as.numeric(str_split(s, ' ')))) +unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' '))) +as.numeric(unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' ')))) +matrix(as.numeric(unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' ')))), ncol = 2) +as.numeric(unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' '))))) +as.numeric(unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' ')))) +as.numeric(unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' '))))) +matrix(as.numeric(unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' ')))), ncol = 2) +matrix(as.numeric(unlist(lapply(tmp %>% str_sub(17, -4) %>% str_split(', '), function(s) str_split(s, ' ')))), ncol = 2, byrow = TRUE) +mp.to.matrix(blocks_raw[1,1]) +mp.to.matrix(blocks_raw[2,1]) +mp.to.matrix(blocks_raw[3,1]) +mp.to.matrix(blocks_raw[4,1]) +tmp_p <- mp.to.matrix(blocks_raw[1,1]) +Polygon(tmp_p) +?gCentroid +class(tmp_p) +gCentroid(tmp_p) +?sp +coordinates(Polygon(tmp_p)) +gCentroid(Polygon(tmp_p)) +?calcCentroid +install.packages('rgdal') +library(rgdal) +?calcCentroid +install.packages('PBSmapping') +?calcCentroid +library(PBSmapping) +install.packages('maptools') +library(maptools) +?calcCentroid +SpatialPolygons(Polygon(tmp_p)) +SpatialPolygons(list(Polygon(tmp_p))) +?slot +SpatialPolygons(list(Polygon(slot(tmp_p, 'Polygons')))) +tmp_p +compute.center(tmp_p) +warnings +warnings() +centers +head(centers) +head(vacant_raw) +nrow(vacant_raw) +nrow(blocks_raw) +head(blocks_raw) +head(blocks_raw$GEOID10) +as.character(head(blocks_raw)) +as.character(head(blocks_raw$GEOID10)) +install.packages('fuzzyjoin') +?geo_join +library(dplyr) +?semi_join +head(vacant_raw) +head(blocks_raw) +head(vacant_raw) +blocks_raw[1:100,] +nrow(blocks_raw[1:100,]) +?distm +str(tmp) +head(blocks_raw[,c('Longitude','Latitude')]) +head(vacant_raw[,c('Longitude','Latitude')]) +is.na(head(vacant_raw[,c('Longitude','Latitude')])) +apply(is.na(head(vacant_raw[,c('Longitude','Latitude')])), 1, any) +nrow(vacant_raw[!apply(is.na(vacant_raw[,c('Longitude', 'Latitude')]), 1, any)]) +nrow(vacant_raw[!apply(is.na(vacant_raw[,c('Longitude', 'Latitude')]), 1, any)]) +names(vacant_raw) +sum(apply(is.na(vacant_raw[,c('Longitude','Latitude')]), 1, any)) +nrow(vacant_raw) +str(tmp) +?distHaversine +mean(tmp) +mean(tmp, na.rm = T) +mean(dist_mat, na.rm = T) +head(counts) +mean(counts) +mean(counts, na.rm = T) +ls() +rm("dist_mat") +gc() +quit() +gc() +names(vacant_raw) +vacant_raw[,c('Longitude')] +vacant_raw[,c('Longitude', 'Latitude')] +nrow(vacant_raw) +gc() +t1 +blocks_raw[1:1000, c('Longitude', 'Latitude')] +as.matrix(blocks_raw[1:1000, c('Longitude', 'Latitude')]) +t`1` +t1 +t1 <- as.matrix(blocks_raw[1:10000, c('Longitude', 'Latitude')]) +t2 <- as.matrix(vacant_raw[1:10000, c('Longitude', 'Latitude')]) +system.time(dist_mat <- spDists(t1, t2)) +system.time(dist_mat <- spDists(t1, t2, longlat = TRUE)) +rm(list=ls()) +gc() +names(vacant_raw) +vacant_raw[,c('Longitude', 'Latitude') +] +rm(list=ls()) +gc() +gc() +gc() +vcov(model) +vcov(model) +print(vcov(model)) +?vcov +?vcov.lm +?stats::vcov.lm +cov2cor(vcov(model)) +car::vif(model) +?car::vif +?lm +?lm.fit +View(cov2cor(vcov(model))) +summary(model) +A +B +vcov(model) +cov2cor(vcov(model)) +nrow(vacant_raw) +nrow(blocks_raw) +nrow(vacant_counts) +str(blocks_raw) +head(population) +head(as.character(blocks_raw$GEOID10)) +names(blocks_raw) +names(population) +nrow(blocks_raw) +str(block_data) +str(data.shape) +data.shape +summary(data.shape) +attr(data.shape) +attributes(data.shape) +str(data.shape_df) +?readOGR +class(data.shape) +?SpatialPolygonsDataFrame +names(data.shape) +data.class[,'PRI_NEIGH'] +data.shape[,'PRI_NEIGH'] +data.shape@data$SEC_NEIGH +data.shape@data$PRI_NEIGH +head(data.shape_df) +plot(data.shape) +data.shape@data +data.shape@polygons +data.shape@polygons[[1]] +data.shape@polygons[[1]]@coords +data.shape@polygons[[1]]@polygons +data.shape@polygons[[1]]@polygon +data.shape@polygons[[1]] +data.shape@polygons[[1]][[1]] +data.shape@polygons[[1]]@Polygons +?over +?df +str(block_data) +head(t) +summary(t) +summary(t) +head(t) +nrow(t) +table(t$PRI_NEIGH) +head(block_data) +head(block_data) +rm(list=ls()) +str(vacant_counts) +head(blocks_raw) +head(block_data) +head(block_data) +head(public_health) +mean(block_data$store_counts == 0) +setdiff(public_health$Community.Area.Name, block_data$Neighborhood) +setdiff(block_data$Neighborhood, public_health$Community.Area.Name) +head(socioeconomic) +?spDists +?readOGR +str(dist_mat3) +str(groceries) +mean(groceries$SQUARE.FEET <= 10000) +?spDists +mean(store_counts_new) +mean(store_counts_new <= 1) +mean(block_data$desert) +mean(block_data$store_counts) +mean(block_data$store_counts <= 1) +mean(block_data$store_counts < 1) +mean(block_data$desert) +str(block_data) +sum(block_data[block_data$desert, 'TOTAL.POPULATION']) +sum(block_data[block_data$desert, 'TOTAL.POPULATION'], na.rm = TRUE) +sum(block_data[~block_data$desert, 'TOTAL.POPULATION'], na.rm = TRUE) +sum(block_data[!block_data$desert, 'TOTAL.POPULATION'], na.rm = TRUE) +nrow(t3) +nrow(groceries) +nrow(groceries[groceries$SQUARE.FEET >= 10000,]) +nrow(groceries[groceries$SQUARE.FEET >= 2500,]) +head(buffer) +head(buffer) +lapply(1:2, buffer) +str(buffers) +str(dist_mat3_mi) +mean(store_counts_new) +mean(store_counts_new < 1) +grep('liquor', tolower(groceries$STORE.NAME)) +grep('liquor', tolower(groceries$STORE.NAME), invert = TRUE) +mean(store_counts_new) +mean(store_counts_new < 1) +nrow(groceries) +str(dist_mat3_mi) +str(buffers) +sum(blocks_raw[blocks_raw$store_counts < 1,'TOTAL.POPULATION']) +blocks_raw[blocks_raw$store_counts < 1,'TOTAL.POPULATION'] +blocks_raw$store_counts +blocks_raw$store_counts < 1 +str(blocks_raw) +sum(block_data[block_data$store_counts == 0, 'TOTAL.POPULATION']) +sum(block_data[block_data$store_counts == 0, 'TOTAL.POPULATION'], na.rm = T) +cbind(groceries$SQUARE.FEET, buffer) +mean(block_data$desert) +sum(block_raw[block_data$desert,'TOTAL.POPULATION']) +sum(block_data[block_data$desert,'TOTAL.POPULATION']) +sum(block_data[block_data$desert,'TOTAL.POPULATION'], na.rm = T) +str(nearest_supermarket) +?stat_density2d +rm(list=ls()) +mean(block_data$desert) +mean(block_data[block_data$desert, 'TOTAL.POPULATION']) +mean(block_data[block_data$desert, 'TOTAL.POPULATION'], na.rm = T) +sum(block_data[block_data$desert, 'TOTAL.POPULATION'], na.rm = T) +str(race) +str(race) +unique(race$X) +unique(public_health$Community.Area.Name) +unique(block_data$Neighborhood) +setdiff(block_data$Neighborhood, public_health$Community.Area.Name) +t(setdiff(block_data$Neighborhood, public_health$Community.Area.Name)) +t(t(setdiff(block_data$Neighborhood, public_health$Community.Area.Name))) +sum(block_data$Neighborhood == 'Garfield Park') +sum(block_data$Neighborhood == 'Garfield Park', na.rm = T) +total +warnings() +head(as.character(block_data$Neighborhood)) +table(block_data$Neighborhood) +t(t(setdiff(block_data$Neighborhood, public_health$Community.Area.Name))) +t(t(setdiff(block_data$Neighborhood, public_health$Community.Area.Name))) +unique(public_health$Community.Area.Name) +unique(block_data$Neighborhood) +length(public_health$Community.Area.Name[public_health$Community.Area.Name == 'West Garfield Park']) +length(public_health$Community.Area.Name[public_health$Community.Area.Name == 'East Garfield Park']) +public_health$Community.Area.Name[public_health$Community.Area.Name == 'East Garfield Park'] +table(public_health$Community.Area.Name) +public_health$Community.Area.Name[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park')] +public_health[,public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park')] +public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),] +apply(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),], 1, mean) +apply(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),], 2, mean) +apply(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),], 2, function(s) mean(s, na.rm = TRUE) +) +warnings() +public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),] +colMeans(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),]) +colMeans(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),-c('Community.Area', 'Community.Area.Name')]) +colMeans(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),- c('Community.Area', 'Community.Area.Name')]) +colMeans(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'),c('Community.Area', 'Community.Area.Name')]) +colMeans(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'), !(names(public_health) %in% c('Community.Area', 'Community.Area.Name'))]) +str(public_health) +colMeans(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'), !(names(public_health) %in% c('Community.Area', 'Community.Area.Name'))]) +public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'), !(names(public_health) %in% c('Community.Area', 'Community.Area.Name'))] +str(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'), !(names(public_health) %in% c('Community.Area', 'Community.Area.Name'))]) +colMeans(public_health[public_health$Community.Area.Name %in% c('East Garfield Park', 'West Garfield Park'), !(names(public_health) %in% c('Community.Area', 'Community.Area.Name'))]) +str(tmp) +str(public_health) +public_health[88,] +public_health[88,] +public_health[88,] +str(socioeconomic) +tmp +tmp +tmp +tmp +str(socioeconomic) +str(public_health) +install.packages('unmarked') +install.packages('AICcmodavg') +library(unmarked) +library(AICcmodavg) +?parboot +unmarked::parboot +unmarked:::parboot +?unmarked::parboot +parboot +getAnywhere('parboot') +getAnywhere('unmarked::parboot') +getAnywhere('parboot') +getMethod() +showMethods('parboot') +getMethod('parboot','unmarkedFit') +quit() +) +length(unique(block_data$Neighborhood)) +length(unique(socioeconomic$COMMUNITY.AREA.NAME)) +setdiff(public_health$Community.Area.Name, socioeconomic$COMMUNITY.AREA.NAME) +length(unique(race$Community.Area)) +length(unique(public_health$Community.Area.Name)) +setdiff(socioeconomic$COMMUNITY.AREA.NAME, public_health$Community.Area.Name) +nrow(socioeconomic) +tail(socioeconomic) +unique(socioeconomic$COMMUNITY.AREA.NAME) +tail(socioeconomic) +tail(socioeconomic) +tail(socioeconomic) +setdiff(socioeconomic$COMMUNITY.AREA.NAME, public_health$Community.Area.Name) +setdiff(socioeconomic$COMMUNITY.AREA.NAME, public_health$Community.Area.Name) +setdiff(public_health$Community.Area.Name,socioeconomic$COMMUNITY.AREA.NAME) +setdiff(race$X, public_health$Community.Area.Name) +names(block_d) +names(block_data) +rm(list=ls()) +str(all_data) +rm(list=ls()) +names(race) +str(race) +to_rep +summary(race$NHW) +sum(is.na(race$NHW)) +sum(is.na(race$NHB)) +sum(is.na(race$NHAM)) +sum(is.na(race$NHAS)) +sum(is.na(race$NHOTHER)) +sum(is.na(race$HISP)) +sum(is.na(race$Multiple.Race..)) +sum(is.na(race$TOTAL)) +sum(is.na(race$Community.Area)) +head(race) +rm(list=ls()) +str(all_data) +mean(complete.cases(all_data)) +?apply +lapply(all_data, function(var) mean(is.na(all_data[,var]))) +pct_missing <- lapply(all_data, function(var) mean(is.na(var))) +pct_missing +all_data[is.na(all_data$NHW),] +table(race$Neighborhood) +names(race) +nrow(race) +race[78] +race[78,] +rm(list=ls()) +pct_missing +str(race) +rm(list=ls()) +quit() +rm(list=ls()) +rm(list=ls()) +str(race) +rm(list=ls()) +str(race) +tmp <- read.csv('race.csv', stringsAsFactors = FALSE) +str(tmp) +rm(list=ls()) +str(race) +str(race) +pct_missing +all_data[is.na(all_data$NHW),] +t <- all_data[is.na(all_data$NHW),] +summary(t) +names(t) +table(t$Neighborhood) +table(t$Neighborhood, useNA = 'ifany') +table(block_data$Neighborhood, useNA = 'ifany') +sum(t$TOTAL.POPULATION) +sum(t$TOTAL.POPULATION, na.rm = T) +install.packages('ggmap') +?get_map +?ggmap +str(data.shape_df) +?fortify +?rgeos::fortify +?rgdal::fortify +quit() +?fortify +summary(data.shape) +quit() +sum(is.na(block_data$Neighborhood)) +mean(is.na(block_data$Neighborhood)) +head(missing) +str(block_data) +sum(block_data[missing, 'TOTAL.POPULATION']) +sum(block_data[missing, 'TOTAL.POPULATION'], na.rm = T) +!missing +str(t1) +missing +miss_ID <- 20 +head(dist_mat) +str(dist_mat) +min(dist_mat) +which.min(dist_mat) +sum(is.na(block_data$Neighborhood)) +sum(is.na(block_data$Neighborhood)) +pct_missing +) +?geom_smooth +sum(is.na(block_data$NHB_p)) +block_data$NHB_p +str(block_data) +sum(is.na(plot_data$NHB_p)) +str(plot_data) +quit() +str(all_data) +str(all_data) +summary(cp) +summary(np) +anova(np) +summary(pp) +summary(np) +str(all_data) +summary(mlm) +install.packages('glmmLasso') +g +q +install.packages('glmmLasso', repos = 'https://cloud.r-project.org/')) +install.packages('glmmLasso', repos = 'https://cloud.r-project.org/') +nrow(all_data) +nrow(model_data) +summary(pp) +model_data['NHB'] +str(model_data_scale) +t <- scale(model_data['NHB']) +str(t) +t <- scale(model_data) +str(as.numeric(scale(model_data['NHB']))) +potential_covariates +summary(best_model) +?parlapply +install.packages('parallel') +1 +raw +rm(list=ls()) +quit() diff --git a/auto/paper.el b/auto/paper.el new file mode 100644 index 0000000..b2dbc94 --- /dev/null +++ b/auto/paper.el @@ -0,0 +1,9 @@ +(TeX-add-style-hook + "paper" + (lambda () + (TeX-run-style-hooks + "latex2e" + "IEEEtran" + "IEEEtran10")) + :latex) + diff --git a/model.r.orig b/model.r.orig new file mode 100644 index 0000000..8bdc17d --- /dev/null +++ b/model.r.orig @@ -0,0 +1,163 @@ +######################## +## Modeling code file ## +######################## + +## load packages +library(lme4) + +## load data +load('all_data') +all_data$desert <- as.numeric(all_data$desert) + +model_data <- subset(all_data, TOTAL.POPULATION > 0) + + +###################### +## Complete pooling ## +###################### + +cp <- glm(desert ~ CTA_counts + vacant_counts, family = 'binomial', data = model_data) +summary(cp) +################ +## No pooling ## +################ + +np <- glm(desert ~ CTA_counts + vacant_counts + Neighborhood, family = 'binomial', data = model_data) +summary(np) + +##################### +## Partial pooling ## +##################### + +pp <- glmer(desert ~ CTA_counts + vacant_counts + (1 | Neighborhood), data = model_data, family = 'binomial') +summary(pp) + +################## +## Hierarchical ## +################## + +## mlm <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood), +## data = model_data, +## family = 'binomial') +## summary(mlm) + +## mlm_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood), +## data = model_data, +## family = 'binomial') +## summary(mlm_2) + + +## rescale variables +exclude <- c('Neighborhood', 'TRACT_BLOC','STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE10', 'GEOID10', 'NAME10', 'Longitude', 'Latitude', 'Community.Area.y', 'nearest_supermarket', 'Community.Area.x', 'store_counts', 'desert') +potential_covariates <- setdiff(names(all_data), exclude) +model_data_scale <- model_data +for (var in potential_covariates) {model_data_scale[var] <- as.numeric(scale(model_data[var]))} + +## mlm_c <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c) + +## mlm_c_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_2) + +## mlm_c_3 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_3) + +## mlm_c_4 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_4) + +## mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + TOTAL.POPULATION + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_5) + +## mlm_c_6 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + NHB_p + TOTAL.POPULATION + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_6) + +## mlm_c_7 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + HISP_p + TOTAL.POPULATION + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_7) + +## mlm_c_8 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + HISP_p + TOTAL.POPULATION + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_8) + +## mlm_c_9 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + NHB_p + TOTAL.POPULATION + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_9) + +## mlm_c_10 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + Diabetes.related + TOTAL.POPULATION + (1 | Neighborhood), +## data = model_data_scale, +## family = 'binomial') +## summary(mlm_c_10) + +cor(model_data[, c('Diabetes.related', 'NHB_p', 'NHW_p', 'HISP_p', 'PER.CAPITA.INCOME')]) + +search_covariates <- setdiff(potential_covariates, c('vacant_counts', 'CTA_counts', 'Community.Area.Number')) + +best_model <- glmer(desert ~ vacant_counts + CTA_counts + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial') + +in_vars <- c() +out_vars <- search_covariates + +library(parallel) +library(doMC) + +registerDoMC(detectCores() - 3) + +old_aic <- AIC(best_model) + +fit_model <- function(i) { + print(paste('fitting:',i)) + form <- as.formula(paste('desert ~ vacant_counts + CTA_counts +', + paste(c(in_vars, out_vars[i]), collapse = '+'), + '+(1|Neighborhood)')) + model <- glmer(form, data = model_data_scale, family = 'binomial') + return(c(i, AIC(model))) +} + +while(TRUE) { + search_results <- foreach(i=1:length(out_vars), .combine = 'rbind') %dopar% fit_model(i) + min_aic <- which.min(search_results[,2]) + if (min_aic < old_aic) { + print(paste('ADDING:',out_vars[min_aic])) + in_vars <- c(in_vars, out_vars[min_aic]) + out_vars <- setdiff(out_vars, out_vars[min_aic]) + print(search_results[min_aic,]) + } + else { + break + } +} +print(paste('Final model:', in_vars, collapse = ', ')) + +<<<<<<< HEAD +model <- glmer(desert ~ CTA_counts + vacant_counts + Gonorrhea.in.Females + Cancer..All.Sites. + TOTAL.POPULATION + NHAS + + Dependency +Childhood.Lead.Poisoning + Prenatal.Care.Beginning.in.First.Trimester + Gonorrhea.in.Males + + NHAM_p + Multiple.Race.. + Stroke..Cerebrovascular.Disease. + Firearm.related + Tuberculosis + NHW_p + + Teen.Birth.Rate + No.High.School.Diploma + Lung.Cancer + + (1|Neighborhood), + data = model_data_scale, + family = 'binomial') +summary(model) +======= +form <- as.formula(paste('desert ~ vacant_counts + CTA_counts +', + paste(in_vars, collapse = '+'), + '+(1|Neighborhood)')) +final_model <- glmer(form, data = model_data_scale, family = 'binomial') +save(final_model, 'final_model') +>>>>>>> fb724787ed67089995bb241b17b3acfa6bf91121 diff --git a/paper.aux b/paper.aux new file mode 100644 index 0000000..e4a2ad3 --- /dev/null +++ b/paper.aux @@ -0,0 +1,29 @@ +\relax +\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {I-A}}Food Deserts}{1}} +\@writefile{toc}{\contentsline {section}{\numberline {II}Methods}{1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-A}}Data Gathering and Manipulation}{1}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {II-A}1}Block level data}{1}} +\@writefile{toc}{\contentsline {paragraph}{\numberline {\unhbox \voidb@x \hbox {II-A}1a} Crimes 2001 - present}{1}} +\@writefile{toc}{\contentsline {paragraph}{\numberline {\unhbox \voidb@x \hbox {II-A}1b} 311 Service Requests: Vacant Buildings}{1}} +\@writefile{toc}{\contentsline {paragraph}{\numberline {\unhbox \voidb@x \hbox {II-A}1c} CTA Ridership: Avg. Weekly Boardings during October 2010}{1}} +\@writefile{toc}{\contentsline {paragraph}{\numberline {\unhbox \voidb@x \hbox {II-A}1d} Census Block Population }{1}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {II-A}2}Neighborhood level data}{1}} +\@writefile{toc}{\contentsline {paragraph}{\numberline {\unhbox \voidb@x \hbox {II-A}2a} Public Health Statistics: selected public health indicators by Chicago community area}{1}} +\@writefile{toc}{\contentsline {paragraph}{\numberline {\unhbox \voidb@x \hbox {II-A}2b} Census Data: Selected socioeconomic indicators }{1}} +\@writefile{toc}{\contentsline {paragraph}{\numberline {\unhbox \voidb@x \hbox {II-A}2c} Race by Community Area }{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-B}}Generalized Linear Models}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-C}}Hierarchical Models}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-D}}Models Fit}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {II-D}1}Complete Pooling}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {II-D}2}No Pooling}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {II-D}3}Partial pooling}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {II-D}4}Hierarchical}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-E}}Model Comparison}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {III}Results}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-A}}Model Parameters}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}1}No Pooling}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}2}Complete Pooling}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}3}Partial Pooling}{2}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\unhbox \voidb@x \hbox {III-A}4}Hierarchical}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {IV}Conclusions}{2}} diff --git a/paper.log b/paper.log new file mode 100644 index 0000000..9397e74 --- /dev/null +++ b/paper.log @@ -0,0 +1,220 @@ +This is pdfTeX, Version 3.14159265-2.6-1.40.17 (TeX Live 2016) (preloaded format=pdflatex 2016.5.22) 20 NOV 2016 17:46 +entering extended mode + restricted \write18 enabled. + file:line:error style messages enabled. + %&-line parsing enabled. +**\input paper.tex +(./paper.tex +(/usr/local/texlive/2016/texmf-dist/tex/latex/IEEEtran/IEEEtran.cls +Document Class: IEEEtran 2015/08/26 V1.8b by Michael Shell +-- See the "IEEEtran_HOWTO" manual for usage information. +-- http://www.michaelshell.org/tex/ieeetran/ +\@IEEEtrantmpdimenA=\dimen102 +\@IEEEtrantmpdimenB=\dimen103 +\@IEEEtrantmpdimenC=\dimen104 +\@IEEEtrantmpcountA=\count79 +\@IEEEtrantmpcountB=\count80 +\@IEEEtrantmpcountC=\count81 +\@IEEEtrantmptoksA=\toks14 +LaTeX Font Info: Try loading font information for OT1+ptm on input line 503. + +(/usr/local/texlive/2016/texmf-dist/tex/latex/psnfss/ot1ptm.fd +File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm. +) +-- Using 8.5in x 11in (letter) paper. +-- Using PDF output. +\@IEEEnormalsizeunitybaselineskip=\dimen105 +-- This is a 10 point document. +\CLASSINFOnormalsizebaselineskip=\dimen106 +\CLASSINFOnormalsizeunitybaselineskip=\dimen107 +\IEEEnormaljot=\dimen108 +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <5> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <5> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <7> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <7> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <8> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <8> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <9> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <9> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <10> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <10> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <11> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <11> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <12> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <12> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <17> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <17> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <20> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <20> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <24> not available +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. +LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <24> not available +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. + +\IEEEquantizedlength=\dimen109 +\IEEEquantizedlengthdiff=\dimen110 +\IEEEquantizedtextheightdiff=\dimen111 +\IEEEilabelindentA=\dimen112 +\IEEEilabelindentB=\dimen113 +\IEEEilabelindent=\dimen114 +\IEEEelabelindent=\dimen115 +\IEEEdlabelindent=\dimen116 +\IEEElabelindent=\dimen117 +\IEEEiednormlabelsep=\dimen118 +\IEEEiedmathlabelsep=\dimen119 +\IEEEiedtopsep=\skip41 +\c@section=\count82 +\c@subsection=\count83 +\c@subsubsection=\count84 +\c@paragraph=\count85 +\c@IEEEsubequation=\count86 +\abovecaptionskip=\skip42 +\belowcaptionskip=\skip43 +\c@figure=\count87 +\c@table=\count88 +\@IEEEeqnnumcols=\count89 +\@IEEEeqncolcnt=\count90 +\@IEEEsubeqnnumrollback=\count91 +\@IEEEquantizeheightA=\dimen120 +\@IEEEquantizeheightB=\dimen121 +\@IEEEquantizeheightC=\dimen122 +\@IEEEquantizeprevdepth=\dimen123 +\@IEEEquantizemultiple=\count92 +\@IEEEquantizeboxA=\box26 +\@IEEEtmpitemindent=\dimen124 +\IEEEPARstartletwidth=\dimen125 +\c@IEEEbiography=\count93 +\@IEEEtranrubishbin=\box27 +) (/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsmath.sty +Package: amsmath 2016/03/10 v2.15b AMS math features +\@mathmargin=\skip44 + +For additional information on amsmath, use the `?' option. +(/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amstext.sty +Package: amstext 2000/06/29 v2.01 AMS text + +(/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsgen.sty +File: amsgen.sty 1999/11/30 v2.0 generic functions +\@emptytoks=\toks15 +\ex@=\dimen126 +)) +(/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsbsy.sty +Package: amsbsy 1999/11/29 v1.2d Bold Symbols +\pmbraise@=\dimen127 +) +(/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsopn.sty +Package: amsopn 2016/03/08 v2.02 operator names +) +\inf@bad=\count94 +LaTeX Info: Redefining \frac on input line 199. +\uproot@=\count95 +\leftroot@=\count96 +LaTeX Info: Redefining \overline on input line 297. +\classnum@=\count97 +\DOTSCASE@=\count98 +LaTeX Info: Redefining \ldots on input line 394. +LaTeX Info: Redefining \dots on input line 397. +LaTeX Info: Redefining \cdots on input line 518. +\Mathstrutbox@=\box28 +\strutbox@=\box29 +\big@size=\dimen128 +LaTeX Font Info: Redeclaring font encoding OML on input line 634. +LaTeX Font Info: Redeclaring font encoding OMS on input line 635. +\macc@depth=\count99 +\c@MaxMatrixCols=\count100 +\dotsspace@=\muskip10 +\c@parentequation=\count101 +\dspbrk@lvl=\count102 +\tag@help=\toks16 +\row@=\count103 +\column@=\count104 +\maxfields@=\count105 +\andhelp@=\toks17 +\eqnshift@=\dimen129 +\alignsep@=\dimen130 +\tagshift@=\dimen131 +\tagwidth@=\dimen132 +\totwidth@=\dimen133 +\lineht@=\dimen134 +\@envbody=\toks18 +\multlinegap=\skip45 +\multlinetaggap=\skip46 +\mathdisplay@stack=\toks19 +LaTeX Info: Redefining \[ on input line 2739. +LaTeX Info: Redefining \] on input line 2740. +) (./paper.aux) +\openout1 = `paper.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 8. +LaTeX Font Info: ... okay on input line 8. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 8. +LaTeX Font Info: ... okay on input line 8. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 8. +LaTeX Font Info: ... okay on input line 8. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 8. +LaTeX Font Info: ... okay on input line 8. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 8. +LaTeX Font Info: ... okay on input line 8. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 8. +LaTeX Font Info: ... okay on input line 8. + +-- Lines per column: 58 (exact). +[1{/usr/local/texlive/2016/texmf-var/fonts/map/pdftex/updmap/pdftex.map} + + +] +[2] (./paper.aux) ) +Here is how much of TeX's memory you used: + 1650 strings out of 493014 + 26068 string characters out of 6133351 + 90665 words of memory out of 5000000 + 5247 multiletter control sequences out of 15000+600000 + 34539 words of font info for 65 fonts, out of 8000000 for 9000 + 1141 hyphenation exceptions out of 8191 + 27i,5n,20p,570b,166s stack positions out of 5000i,500n,10000p,200000b,80000s +{/usr/local/texlive/2016/texmf-dist/fonts/enc/dvips/base/8r. +enc} + +Output written on paper.pdf (2 pages, 116127 bytes). +PDF statistics: + 56 PDF objects out of 1000 (max. 8388607) + 40 compressed objects within 1 object stream + 0 named destinations out of 1000 (max. 500000) + 1 words of extra memory for PDF output out of 10000 (max. 10000000) + diff --git a/paper.out b/paper.out new file mode 100644 index 0000000..235c8dd --- /dev/null +++ b/paper.out @@ -0,0 +1,9 @@ +\BOOKMARK [1][-]{section.1}{Introduction}{}% 1 +\BOOKMARK [2][-]{subsection.1.1}{Food Deserts}{section.1}% 2 +\BOOKMARK [1][-]{section.2}{Methods}{}% 3 +\BOOKMARK [2][-]{subsection.2.1}{Data Sources}{section.2}% 4 +\BOOKMARK [2][-]{subsection.2.2}{Generalized Linear Models}{section.2}% 5 +\BOOKMARK [2][-]{subsection.2.3}{Hierarchical Models}{section.2}% 6 +\BOOKMARK [2][-]{subsection.2.4}{Model Comparison}{section.2}% 7 +\BOOKMARK [1][-]{section.3}{Results}{}% 8 +\BOOKMARK [1][-]{section.4}{Conclusions}{}% 9 diff --git a/paper.pdf b/paper.pdf index 03f9114..5572484 100644 Binary files a/paper.pdf and b/paper.pdf differ diff --git a/paper.tex b/paper.tex index fd8b009..a90b7dd 100644 --- a/paper.tex +++ b/paper.tex @@ -1,6 +1,6 @@ \documentclass{IEEEtran} - +\usepackage{amsmath} \title{Access to Food in Chicago: \\ a Hierarchical Perspective} \author{Daniel Berry} @@ -26,35 +26,103 @@ \subsection{Food Deserts} For this work we used the definition from %TODO: citation of defining a city block as being in a food desert if the city block is more than 1 mile from a supermarket. Supermarket in this context is a grocery store that is larger than 10000 square feet %TODO: citation that is not primarily a liquor store. Distance is defined as the great circle distance from the center of mass of the city block to the center of mass of the grocery store. + + \section{Methods} +In the following section we describe the procedures used. We begin by describing the data used and including data sources. Then we move on to describing the types of models used. -\subsection{Data Sources} +\subsection{Data Gathering and Manipulation} All data was sourced from the generous Open Data Portal operated by the city of Chicago. The portal can be found at data.cityofchicago.org. We utilized the following data files: -\begin{itemize} -\item Block level data - \begin{itemize} - \item Crimes 2001 - present - \item 311 Service Requests: Vacant Buildings - \item CTA Ridership: Avg. Weekly Boardings during October 2010 - \end{itemize} -\item Neighborhood level data - \begin{itemize} - \item Public Health Statistics: selected public health indicators by Chicago community area - \item Census Data: Selected socioeconomic indicators - \item Census Block Population - \item Race by Community Area - \end{itemize} -\end{itemize} + +\subsubsection{Block level data} + +\paragraph{ Crimes 2001 - present} +This file contains a record for crimes in Chicago since 2001 with information about the type of crime as well as its location. The location is pseudo-anonymized to be random but within the same city block. For each city block we counted the total number of crimes committed within 1 mile in 2009. Our hypothesis \textit{a priori} was that food deserts were often located in high-crime areas. + +\paragraph{ 311 Service Requests: Vacant Buildings} +This file contains a record for every 311 service call about an abandoned/unoccupied/unlawfully occupied building. For each city block we counted the number of calls for vacant buildings where the building was located within 1 mile of the city block. Our hypothesis was that food deserts were located in areas with higher levels of vacant buildings. + +\paragraph{ CTA Ridership: Avg. Weekly Boardings during October 2010} +This file contains average weekly boardings for the month of October 2010 for every bus stop in Chicago. We counted the total boardings for all stops within a mile of each city block. Our hypothesis was that residents of food deserts would tend to have a higher reliance on public transportation than residents in other areas. + +\paragraph{ Census Block Population } +This file contains the population of each city block. + +\subsubsection{Neighborhood level data} + +\paragraph{ Public Health Statistics: selected public health indicators by Chicago community area} +This file contains a record for every neighborhood in Chicago with selected public health information. Examples include teenage births per 100,000 residents and Gonorrhea prevelence (cases per 100,000). Our hypothesis was that food deserts were likely to be underserved in a more general public health sense than just lacking access to food. + +\paragraph{ Census Data: Selected socioeconomic indicators } +This file contains a record for every neighborhood in Chicago with information about the socioeconomic status of that neighborhood. Examples include percent of residents below the povery level and percent without a high school diploma. + +\paragraph{ Race by Community Area } +This file contains a record for every neighborhood in Chicago with the number of residents of each race who reside in that neighborhood. \subsection{Generalized Linear Models} +In ordinary linear regression (or ordinary least squares, OLS) we assume that our observations $y$ are some linear combination of the covariates, $x_i$'s that we have plus random error. In matrix notation this can be expressed as: $$y = X\beta + \epsilon$$ where $y$ is the vector of responses, $X$ is the (model) matrix of data, $\beta$ is the vector of regression coefficients, and $\epsilon \sim N(0, \sigma^2 I)$ is the random error. + +Generalized linear models extends this to non-normally distributed responses through the use of a link function $g$: $$Y = g^{-1}(X\beta)$$. For {0,1} or binomially distributed responses, there are a variety of link functions available. One of the most common is the logit link: $g(x) = \log(\frac{x}{1-x})$. For simplicity, we chose the logit link although there are certainly other options available (probit, t, etc.). + \subsection{Hierarchical Models} +Hierarchical models are another extension to the family of linear models which allow for + +\subsection{Models Fit} + +\subsubsection{Complete Pooling} + +To begin we have the simplest model: ordinary regression using only the block-level variables. This model pools together every neighborhood as if the neighborhood distinctions don't matter. + +$$ y_{ij} = \text{logit}^{-1}\left( \alpha + X_{B}\beta_{B} + \epsilon_{ij} \right) $$ + +Where $\epsilon_{ij} \sim N(0, \sigma^2)$ + +\subsubsection{No Pooling} + +The next model has a different but nonrandom intercept for each neighborhood, a fixed effect for that neighborhood. This would correspond to our belief that the neighborhoods are each different from the others. + +$$ y_{i} = \text{logit}^{-1}\left( \alpha + X_{B}\beta_{B} + \gamma_j + \epsilon_{ij} \right) $$ + +Where $\epsilon_i \sim N(0, \sigma^2)$ + +\subsubsection{Partial pooling} + +The next model has a random intercept for each neighborhood which corresponds to partially pooling the data together. For every neighborhood we use some of the information in other neighborhoods to estimate its intercept. That is, the intercepts in the previous model are shrunk toward the common mean. + +$$ y_{i} = \text{logit}^{-1}\left( \alpha_{j[i]} + X_{B}\beta_{B} + \epsilon_i \right) $$ + +Where $\epsilon_i \sim N(0, \sigma^2)$ and $\alpha_j \sim N(0, \sigma^2_\alpha)$ + +\subsubsection{Hierarchical} + +The final and most complicated model that was fit was a hierarchical model including the neighborhood level predictors in estimating the random intercept for each neighborhood. + +$$ y_{i} = \text{logit}^{-1}\left( \alpha_{j[i]} + X_{B}\beta_{B} + \epsilon_i \right) $$ + +Where $\epsilon_i \sim N(0, \sigma^2)$ and $\alpha_j \sim N(X_N \beta_N, \sigma^2_\alpha)$ + + \subsection{Model Comparison} +Models were compared using AIC and cross validated Breir Score (Mean Square Error in the case of 2-class logistic regression) + \section{Results} +\subsection{Model Parameters} + +\subsubsection{No Pooling} + +\subsubsection{Complete Pooling} + +\subsubsection{Partial Pooling} + +\subsubsection{Hierarchical} + + + \section{Conclusions} \end{document} \ No newline at end of file diff --git a/plots.r b/plots.r index 1423e9c..9df551a 100644 --- a/plots.r +++ b/plots.r @@ -53,15 +53,18 @@ watershedDF <- merge(nbhd_df, data.shape@data, by = "id") ## sp_block_data_df <- fortify(sp_block_data) -ggmap(base_map, extent = 'normal') + geom_point(aes(Longitude, Latitude, color = missing), data = plot_data, alpha = .01) +ggmap(base_map, extent = 'normal') + + geom_point(aes(Longitude, Latitude, color = missing), data = plot_data, alpha = .01) + theme_bw() t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.66666666666666+lon_0=-88.33333333333333', inv = TRUE) head(t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.66666666666666+lon_0=-88.33333333333333', inv = TRUE)) -head(t <- project(as.matrix(nbhd_df[,c('long', 'lat')]),'+proj=merc +lat_0=36.66666666666666 +lon_0=-88.33333333333333 +k=0.9999749999999999 +x_0=300000 +y_0=0', inv = TRUE)) +head(t <- project(as.matrix(all_data[,c('Longitude', 'Latitude')]),'+proj=tmerc +lat_0=36.66666666666666 +lon_0=-88.33333333333333 +k=0.9999749999999999 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs +ellps=GRS80 +towgs84=0,0,0')) +all_data$Longitude_t <- t[,1] +all_data$Latitude_t <- t[,2] ###################### ## Univariate Plots ## ###################### @@ -99,4 +102,16 @@ for (covar in potential_covariates) { ########## all_data$desert_logical <- all_data$desert == 1 -ggplot(all_data, aes(x = Longitude, y = Latitude, color = desert_logical)) + geom_point(alpha = .1) + theme_bw() + +ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = desert_logical)) + + geom_point(alpha = .1) + + theme_bw() + + scale_color_manual(values = c('grey', 'black')) + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + +ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = NHB_p)) + + geom_point(alpha = .1) + + theme_bw() + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + labs() +