Skip to content

Commit

Permalink
Tried to compute distance matrices
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Berry authored and Daniel Berry committed Nov 13, 2016
1 parent 2993e4b commit b976c6b
Showing 1 changed file with 29 additions and 1 deletion.
30 changes: 29 additions & 1 deletion ETL.r
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ library(magrittr)
library(stringr)
library(rgeos)
library(fuzzyjoin)
library(geosphere)

getwd()

Expand All @@ -19,6 +20,7 @@ compute.center <- function(coords) {c(mean(coords[,1]), mean(coords[,2]))}


blocks_raw <- read.csv('CensusBlockTIGER2010.csv', stringsAsFactors = FALSE)
blocks_raw <- blocks_raw[apply(!is.na(blocks_raw[,c('Longitude', 'Latitude')]), 1, any),]

centers <- do.call('rbind', lapply(blocks_raw$the_geom, function(s) compute.center(mp.to.matrix(s))))

Expand All @@ -29,6 +31,8 @@ vacant_raw <- read.csv('311_Service_Requests_-_Vacant_and_Abandoned_Buildings_Re
stringsAsFactors = FALSE,
skip = 1)

vacant_raw <- vacant_raw[apply(!is.na(vacant_raw[,c('Longitude', 'Latitude')]), 1, any),]

names(vacant_raw) <- c('Type',
'ID',
'Date_Recieved',
Expand All @@ -50,5 +54,29 @@ names(vacant_raw) <- c('Type',
'Longitude',
'Location_string')

tmp <- geo_full_join(blocks_raw, vacant_raw, by = c('Longitude', 'Latitude'), distance_col = 'dist')
## tmp <- geo_full_join(blocks_raw[1:1,], vacant_raw[1:1,], by = c('Longitude', 'Latitude'), distance_col = 'dist')

## system.time(dist_mat <- distm(blocks_raw[1:1000,c('Longitude','Latitude')], vacant_raw[1:1000,c('Longitude','Latitude')]))
t1 <- as.matrix(blocks_raw[, c('Longitude', 'Latitude')])
t2 <- as.matrix(vacant_raw[, c('Longitude', 'Latitude')])
system.time(dist_mat <- spDists(t1, t2, longlat = TRUE))

dist_mat <- dist_mat / 1609.344

counts <- rowSums(dist_mat <= 1)


## TODO:
## - Block level features:
## - Compute population within a threshold (probably 1 mile due to how long everything takes to run)
## - Compute bus ridership within threshold
## - Compute crimes within a certain threshold
## - Neighborhood level features (load in and join):
## - Demographics
## - Racial breakdown
## - Poverty
## - Income
## - Public Health
## - Cause of death? Diabetes?
## - Public Health Indicators

0 comments on commit b976c6b

Please sign in to comment.