Skip to content

Commit

Permalink
some changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Nov 17, 2016
1 parent 5d75815 commit dded1e2
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 66 deletions.
6 changes: 6 additions & 0 deletions ETL.r
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,13 @@ blocks_raw$the_geom <- NULL

nrow(block_data <- merge(blocks_raw, population, by.x = 'TRACT_BLOC', by.y = 'CENSUS.BLOCK', all.x = TRUE))

##############################
## NEIGHBORHOOD INFORMATION ##
##############################

library(data.table)

crime <- fread('rows.csv')

library(ggplot2)

Expand Down
132 changes: 69 additions & 63 deletions model.r
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ summary(pp)
## Hierarchical ##
##################

mlm <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
data = model_data,
family = 'binomial')
summary(mlm)
## mlm <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
## data = model_data,
## family = 'binomial')
## summary(mlm)

mlm_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
data = model_data,
family = 'binomial')
summary(mlm_2)
## mlm_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
## data = model_data,
## family = 'binomial')
## summary(mlm_2)


## rescale variables
Expand All @@ -53,59 +53,59 @@ potential_covariates <- setdiff(names(all_data), exclude)
model_data_scale <- model_data
for (var in potential_covariates) {model_data_scale[var] <- as.numeric(scale(model_data[var]))}

mlm_c <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c)

mlm_c_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_2)

mlm_c_3 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_3)

mlm_c_4 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_4)

mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_5)

mlm_c_6 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + NHB_p + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_6)

mlm_c_7 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_7)

mlm_c_8 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_8)

mlm_c_9 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + NHB_p + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_9)

mlm_c_10 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + Diabetes.related + TOTAL.POPULATION + (1 | Neighborhood),
data = model_data_scale,
family = 'binomial')
summary(mlm_c_10)
## mlm_c <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c)

## mlm_c_2 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_2)

## mlm_c_3 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_3)

## mlm_c_4 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_4)

## mlm_c_5 <- glmer(desert ~ CTA_counts + vacant_counts + Diabetes.related + Below.Poverty.Level + NHB_p + PER.CAPITA.INCOME + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_5)

## mlm_c_6 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + NHB_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_6)

## mlm_c_7 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_7)

## mlm_c_8 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + HISP_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_8)

## mlm_c_9 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + NHB_p + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_9)

## mlm_c_10 <- glmer(desert ~ CTA_counts + vacant_counts + Dependency + Cancer..All.Sites. + Diabetes.related + TOTAL.POPULATION + (1 | Neighborhood),
## data = model_data_scale,
## family = 'binomial')
## summary(mlm_c_10)

cor(model_data[, c('Diabetes.related', 'NHB_p', 'NHW_p', 'HISP_p', 'PER.CAPITA.INCOME')])

search_covariates <- setdiff(potential_covariates, c('vacant_counts', 'CTA_counts'))
search_covariates <- setdiff(potential_covariates, c('vacant_counts', 'CTA_counts', 'Community.Area.Number'))

best_model <- glmer(desert ~ vacant_counts + CTA_counts + (1 | Neighborhood),
data = model_data_scale,
Expand All @@ -117,7 +117,7 @@ out_vars <- search_covariates
library(parallel)
library(doMC)

registerDoMC(3)
registerDoMC(detectCores() - 3)

old_aic <- AIC(best_model)

Expand All @@ -134,13 +134,19 @@ while(TRUE) {
search_results <- foreach(i=1:length(out_vars), .combine = 'rbind') %dopar% fit_model(i)
min_aic <- which.min(search_results[,2])
if (min_aic < old_aic) {
print(paste('ADDING:',out_vars[i]))
in_vars <- c(in_vars, out_vars[i])
out_vars <- setdiff(out_vars, out_vars[i])

print(paste('ADDING:',out_vars[min_aic]))
in_vars <- c(in_vars, out_vars[min_aic])
out_vars <- setdiff(out_vars, out_vars[min_aic])
print(search_results[min_aic,])
}
else {
break
}
}
print(paste('Final model:', in_vars, collapse = ', '))

form <- as.formula(paste('desert ~ vacant_counts + CTA_counts +',
paste(in_vars, collapse = '+'),
'+(1|Neighborhood)'))
final_model <- glmer(form, data = model_data_scale, family = 'binomial')
save(final_model, 'final_model')
9 changes: 6 additions & 3 deletions plots.r
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ nbhd_df <- merge(nbhd_points, data.shape@data, by = 'id')
ggplot(nbhd_df, aes(long, lat, group = id)) + geom_path()

# add to data a new column termed "id" composed of the rownames of data
dataProjected@data$id <- rownames(dataProjected@data)
data.shape@data$id <- rownames(data.shape@data)

# create a data.frame from our spatial object
watershedPoints <- fortify(dataProjected, region = "id")
watershedPoints <- fortify(data.shape, region = "id")

# merge the "fortified" data with the data from our spatial object
watershedDF <- merge(watershedPoints, dataProjected@data, by = "id")
watershedDF <- merge(nbhd_df, data.shape@data, by = "id")




## sp_block_data <- block_data
## coordinates(sp_block_data) <- ~ Longitude + Latitude
Expand Down

0 comments on commit dded1e2

Please sign in to comment.