Skip to content

Commit

Permalink
began work on plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Berry authored and Daniel Berry committed Nov 16, 2016
1 parent d708e4b commit beb6b51
Show file tree
Hide file tree
Showing 4 changed files with 47,339 additions and 47,264 deletions.
18 changes: 17 additions & 1 deletion ETL.r
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,15 @@ socioeconomic$COMMUNITY.AREA.NAME[socioeconomic$COMMUNITY.AREA.NAME == 'Washingt

race$X[race$X == 'Montclare'] <- 'Montclaire'

to_rep <- setdiff(names(race), c('X', 'Community.Area'))
for (var in to_rep) {race[,var] <- as.numeric(gsub(',','', race[,var]))}

tmp <- colMeans(race[race$X %in% c('East Garfield Park', 'West Garfield Park'), c("NHW","NHB", "NHAM", "NHAS", "NHOTHER", "HISP", "Multiple.Race..", "TOTAL")])
race[78,'X'] <- 'Garfield Park'
for (var in names(tmp)) {race[78,var] <- tmp[var]}



## Standardize names:
public_health$Neighborhood <- public_health$Community.Area.Name
public_health$Community.Area.Name <- NULL
Expand All @@ -230,13 +239,21 @@ socioeconomic$COMMUNITY.AREA.NAME <- NULL
race$Neighborhood <- race$X
race$X <- NULL



for (var in setdiff(to_rep, 'TOTAL')) {race[,paste0(var,'_p')] <- race[,var] / race[,'TOTAL']}

all_data <- merge(block_data, public_health, by = 'Neighborhood', all.x = TRUE)
all_data <- merge(all_data, socioeconomic, by = 'Neighborhood', all.x = TRUE)
all_data <- merge(all_data, race, by = 'Neighborhood', all.x = TRUE)



write.csv(all_data, file = 'all_data.csv')
save(all_data, file = 'all_data')



## TODO:
## - Block level features:
## - Compute population within a threshold (probably 1 mile due to how long everything takes to run)
Expand All @@ -251,4 +268,3 @@ save(all_data, file = 'all_data')
## - Public Health
## - Cause of death? Diabetes?
## - Public Health Indicators

Binary file modified all_data
Binary file not shown.
Loading

0 comments on commit beb6b51

Please sign in to comment.