Skip to content

Commit

Permalink
Continued search, began work on including crime
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Nov 20, 2016
1 parent a3690ee commit a811fd1
Show file tree
Hide file tree
Showing 3 changed files with 501 additions and 26 deletions.
14 changes: 13 additions & 1 deletion ETL.r
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ CTA_counts <- read.csv('CTA_counts.csv')

blocks_raw$CTA_counts <- CTA_counts$x

library(data.table)
crimes <- fread('../rows.csv')

t2 <- as.matrix(crime_2009[,c('Latitude', 'Longitude'), with = FALSE])
tmp <- ddply(blocks_raw, .(GEOID10), function(df) {
t1 <- as.matrix(df[1,c('Latitude', 'Longitude')])

dists <- spDists(t1,t2)

count <- nrow(crime_2009[])
})

groceries <- read.csv('food-deserts-master/data/Grocery_Stores_-_2011.csv', stringsAsFactors = FALSE)
## drop liquor stores
groceries <- groceries[grep('liquor', tolower(groceries$STORE.NAME), invert = TRUE),]
Expand Down Expand Up @@ -131,7 +143,7 @@ nrow(block_data <- merge(blocks_raw, population, by.x = 'TRACT_BLOC', by.y = 'CE

library(data.table)

crime <- fread('rows.csv')
crime <- fread('../rows.csv')

library(ggplot2)

Expand Down
73 changes: 48 additions & 25 deletions model.r
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

########################
## Modeling code file ##
########################
Expand Down Expand Up @@ -130,6 +131,26 @@ fit_model <- function(i) {
return(c(i, AIC(model)))
}

model <- glmer(desert ~ CTA_counts + vacant_counts + Gonorrhea.in.Females + Cancer..All.Sites. + TOTAL.POPULATION + NHAS +
Dependency +Childhood.Lead.Poisoning + Prenatal.Care.Beginning.in.First.Trimester + Gonorrhea.in.Males
+ NHAM_p + Multiple.Race.. + Stroke..Cerebrovascular.Disease. + Firearm.related + Tuberculosis + NHW_p +
Teen.Birth.Rate + No.High.School.Diploma + Lung.Cancer +
(1|Neighborhood),
data = model_data_scale,
family = 'binomial')

summary(model)

in_vars <- c("Gonorrhea.in.Females" , 'Cancer..All.Sites.' , 'TOTAL.POPULATION' , 'NHAS' ,
'Dependency' ,'Childhood.Lead.Poisoning' , 'Prenatal.Care.Beginning.in.First.Trimester' , 'Gonorrhea.in.Males'
, 'NHAM_p' , 'Multiple.Race..' , 'Stroke..Cerebrovascular.Disease.' , 'Firearm.related' , 'Tuberculosis' , 'NHW_p' ,
'Teen.Birth.Rate' , 'No.High.School.Diploma' , 'Lung.Cancer', 'Colorectal.Cancer', 'HISP_p', 'Below.Poverty.Level', 'Breast.cancer.in.females', 'NHAM')
out_vars <- setdiff(search_covariates, in_vars)


old_aic <- AIC(model)


while(TRUE) {
search_results <- foreach(i=1:length(out_vars), .combine = 'rbind') %dopar% fit_model(i)
min_aic <- which.min(search_results[,2])
Expand All @@ -145,33 +166,35 @@ while(TRUE) {
}
print(paste('Final model:', in_vars, collapse = ', '))

model <- glmer(desert ~ CTA_counts + vacant_counts + Gonorrhea.in.Females + Cancer..All.Sites. + TOTAL.POPULATION + NHAS +
Dependency +Childhood.Lead.Poisoning + Prenatal.Care.Beginning.in.First.Trimester + Gonorrhea.in.Males
+ NHAM_p + Multiple.Race.. + Stroke..Cerebrovascular.Disease. + Firearm.related + Tuberculosis + NHW_p +
Teen.Birth.Rate + No.High.School.Diploma + Lung.Cancer +
(1|Neighborhood),
data = model_data_scale,
family = 'binomial')

summary(model)

in_vars <- c("Gonorrhea.in.Females" , 'Cancer..All.Sites.' , 'TOTAL.POPULATION' , 'NHAS' ,
'Dependency' ,'Childhood.Lead.Poisoning' , 'Prenatal.Care.Beginning.in.First.Trimester' , 'Gonorrhea.in.Males'
, 'NHAM_p' , 'Multiple.Race..' , 'Stroke..Cerebrovascular.Disease.' , 'Firearm.related' , 'Tuberculosis' , 'NHW_p' ,
'Teen.Birth.Rate' , 'No.High.School.Diploma' , 'Lung.Cancer')
out_vars <- setdiff(search_covariates, in_vars)
## model <- glmer(desert ~ CTA_counts + vacant_counts + Gonorrhea.in.Females + Cancer..All.Sites. + TOTAL.POPULATION + NHAS +
## Dependency +Childhood.Lead.Poisoning + Prenatal.Care.Beginning.in.First.Trimester + Gonorrhea.in.Males
## + NHAM_p + Multiple.Race.. + Stroke..Cerebrovascular.Disease. + Firearm.related + Tuberculosis + NHW_p +
## Teen.Birth.Rate + No.High.School.Diploma + Lung.Cancer +
## (1|Neighborhood),
## data = model_data_scale,
## family = 'binomial')

## summary(model)

old_aic <- AIC(model)
## in_vars <- c("Gonorrhea.in.Females" , 'Cancer..All.Sites.' , 'TOTAL.POPULATION' , 'NHAS' ,
## 'Dependency' ,'Childhood.Lead.Poisoning' , 'Prenatal.Care.Beginning.in.First.Trimester' , 'Gonorrhea.in.Males'
## , 'NHAM_p' , 'Multiple.Race..' , 'Stroke..Cerebrovascular.Disease.' , 'Firearm.related' , 'Tuberculosis' , 'NHW_p' ,
## 'Teen.Birth.Rate' , 'No.High.School.Diploma' , 'Lung.Cancer')
## out_vars <- setdiff(search_covariates, in_vars)

search_results <- foreach(i=1:length(out_vars), .combine = 'rbind') %dopar% fit_model(i)
min_aic <- which.min(search_results[,2])
if (min_aic < old_aic) {
print(paste('ADDING:',out_vars[min_aic]))
in_vars <- c(in_vars, out_vars[min_aic])
out_vars <- setdiff(out_vars, out_vars[min_aic])
print(search_results[min_aic,])
}
else {
break
}

## old_aic <- AIC(model)

## search_results <- foreach(i=1:length(out_vars), .combine = 'rbind') %dopar% fit_model(i)
## min_aic <- which.min(search_results[,2])
## if (min_aic < old_aic) {
## print(paste('ADDING:',out_vars[min_aic]))
## in_vars <- c(in_vars, out_vars[min_aic])
## out_vars <- setdiff(out_vars, out_vars[min_aic])
## print(search_results[min_aic,])
## }
## else {
## break
## }
Loading

0 comments on commit a811fd1

Please sign in to comment.