diff --git a/auto/paper.el b/auto/paper.el index 81185ba..7420d9f 100644 --- a/auto/paper.el +++ b/auto/paper.el @@ -8,11 +8,18 @@ "report" "rep10" "geometry" - "amsmath") + "amsmath" + "graphicx") (LaTeX-add-labels + "fig:desert" + "fig:black" + "fig:white" + "fig:income" + "fig:vacant" "cpresult" "npresult" "ppresult" + "mlm" "AICs" "MSEs")) :latex) diff --git a/model.r b/model.r index c81feb3..40201fd 100644 --- a/model.r +++ b/model.r @@ -297,6 +297,7 @@ model3 <- glmer(desert ~ CTA_counts + model4 <- glmer(desert ~ CTA_counts + vacant_counts + crime + + Cancer..All.Sites. + TOTAL.POPULATION + NHAS + Dependency + @@ -316,6 +317,108 @@ model4 <- glmer(desert ~ CTA_counts + verbose = TRUE, control = glmerControl(calc.derivs = FALSE, optCtrl=list(maxfun=5000))) +model5 <- glmer(desert ~ . - + Neighborhood - + TRACT_BLOC - + STATEFP10 - + COUNTYFP10 - + TRACTCE10 - + BLOCKCE10 - + GEOID10 - + NAME10 - + Latitude - + Longitude - + Birth.Rate - + Community.Area.Number - + Childhood.Blood.Lead.Level.Screening - + Childhood.Lead.Poisoning - + Community.Area.x - + Community.Area.y - + Gonorrhea.in.Females - + Gonorrhea.in.Males - + Prostate.Cancer.in.Males - + Breast.cancer.in.females - + Colorectal.Cancer - + TOTAL - + Multiple.Race.. - + PERCENT.HOUSEHOLDS.BELOW.POVERTY - + PERCENT.OF.HOUSING.CROWDED - + PERCENT.AGED.16..UNEMPLOYED - + PERCENT.AGED.25..WITHOUT.HIGH.SCHOOL.DIPLOMA - + PER.CAPITA.INCOME - + PERCENT.AGED.UNDER.18.OR.OVER.64 - + HARDSHIP.INDEX - + nearest_supermarket - + Below.Poverty.Level - + Prenatal.Care.Beginning.in.First.Trimester - + Preterm.Births - + Low.Birth.Weight - + Crowded.Housing - + General.Fertility.Rate - + Infant.Mortality.Rate - + NHW - + NHW_p - + NHOTHER - + NHOTHER_p - + Multiple.Race.._p + + (1|Neighborhood), + data = model_data_scale, + family = 'binomial', + verbose = 2, + control = glmerControl(calc.derivs = FALSE, optCtrl = list(maxfun = 5000))) + +summary(model6 <- glmer(desert ~ CTA_counts + crime + vacant_counts + + Cancer..All.Sites. + + Diabetes.related + + Dependency + + TOTAL.POPULATION + + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial', + verbose = 2, + control = glmerControl(calc.derivs = FALSE, optCtrl = list(maxfun = 1000)))) + +## Gonorrhea in females +## Cancer all sites +## Total population +## NHAS +## Dependency +## Childhood lead poisoning +## Prenatal care beginning in first trimester +## Gonorrhea in males +## NHAM_p +## Multiple.Race.. +## Stroke..Cerebrovascular.Disease +## Firearm.related +## Tuberculosis +## NHW_p +## Teen birth rate +## No.high.school.diploma +## Lung.cancer + +summary(model7 <- glmer(desert ~ CTA_counts + crime + vacant_counts + + Cancer..All.Sites. + + TOTAL.POPULATION + + Prenatal.Care.Beginning.in.First.Trimester + + NHAM_p + + Multiple.Race.. + + Stroke..Cerebrovascular.Disease. + + Tuberculosis + + NHW_p + + Teen.Birth.Rate + + No.High.School.Diploma + + Lung.Cancer + + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial', + verbose = 2, + control = glmerControl(calc.derivs = FALSE, optCtrl = list(maxfun = 2000)) + )) + +summary(pp <- glmer(desert ~ CTA_counts + vacant_counts + crime + (1 | Neighborhood), data = model_data_scale, family = 'binomial', verbose = 2)) + +summary(cp <- glm(desert ~ CTA_counts + vacant_counts + crime, data = model_data_scale, family = 'binomial')) + t_data <- complete_datas2[[1]] model4 <- glmer(desert ~ CTA_counts + @@ -325,7 +428,7 @@ model4 <- glmer(desert ~ CTA_counts + Dependency + Childhood.Lead.Poisoning + Prenatal.Care.Beginning.in.First.Trimester + - NHAM_p +.[.---------------[-]] + NHAM_p + Multiple.Race.. + Stroke..Cerebrovascular.Disease. + Tuberculosis + @@ -357,26 +460,37 @@ for (i in 1:10) { pp <- glmer(desert ~ CTA_counts + vacant_counts + crime + (1 | Neighborhood), data = train, family = 'binomial') print(paste('AIC pp:', AIC(pp))) - mlm <- glmer(desert ~ CTA_counts + - vacant_counts + - crime + - TOTAL.POPULATION + - NHAS + - Dependency + - Childhood.Lead.Poisoning + - Prenatal.Care.Beginning.in.First.Trimester + - NHAM_p + - Multiple.Race.. + - Stroke..Cerebrovascular.Disease. + - Tuberculosis + - Teen.Birth.Rate + - No.High.School.Diploma + - Lung.Cancer + - Colorectal.Cancer + - (1|Neighborhood), - data = train, - family = 'binomial', - control = glmerControl(calc.derivs = FALSE, optCtrl=list(maxfun=1000))) + ## mlm <- glmer(desert ~ CTA_counts + + ## vacant_counts + + ## crime + + ## TOTAL.POPULATION + + ## NHAS + + ## Dependency + + ## Childhood.Lead.Poisoning + + ## Prenatal.Care.Beginning.in.First.Trimester + + ## NHAM_p + + ## Multiple.Race.. + + ## Stroke..Cerebrovascular.Disease. + + ## Tuberculosis + + ## Teen.Birth.Rate + + ## No.High.School.Diploma + + ## Lung.Cancer + + ## Colorectal.Cancer + + ## (1|Neighborhood), + ## data = train, + ## family = 'binomial', + ## control = glmerControl(calc.derivs = FALSE, optCtrl=list(maxfun=1000))) + + mlm <- glmer(desert ~ CTA_counts + crime + vacant_counts + + Cancer..All.Sites. + + Diabetes.related + + Dependency + + TOTAL.POPULATION + + (1 | Neighborhood), + data = train, + family = 'binomial', + control = glmerControl(calc.derivs = FALSE, optCtrl = list(maxfun = 1000))) + print(paste('AIC mlm:', AIC(mlm))) @@ -464,3 +578,47 @@ print(paste('Final model:', in_vars, collapse = ', ')) ## else { ## break ## } + + +cp <- glm(desert ~ CTA_counts + vacant_counts + crime, family = 'binomial', data = model_data_scale) +print(paste('AIC cp:', AIC(cp))) + +np <- glm(desert ~ CTA_counts + vacant_counts + crime + Neighborhood, family = 'binomial', data = model_data_scale) +print(paste('AIC np:', AIC(np))) + +pp <- glmer(desert ~ CTA_counts + vacant_counts + crime + (1 | Neighborhood), data = model_data_scale, family = 'binomial') +print(paste('AIC pp:', AIC(pp))) + +## mlm <- glmer(desert ~ CTA_counts + +## vacant_counts + +## crime + +## TOTAL.POPULATION + +## NHAS + +## Dependency + +## Childhood.Lead.Poisoning + +## Prenatal.Care.Beginning.in.First.Trimester + +## NHAM_p + +## Multiple.Race.. + +## Stroke..Cerebrovascular.Disease. + +## Tuberculosis + +## Teen.Birth.Rate + +## No.High.School.Diploma + +## Lung.Cancer + +## Colorectal.Cancer + +## (1|Neighborhood), +## data = model_data_scale, +## family = 'binomial', +## control = glmerControl(calc.derivs = FALSE, optCtrl=list(maxfun=1000))) + +mlm <- glmer(desert ~ CTA_counts + crime + vacant_counts + + Cancer..All.Sites. + + Diabetes.related + + Dependency + + TOTAL.POPULATION + + (1 | Neighborhood), + data = model_data_scale, + family = 'binomial', + control = glmerControl(calc.derivs = FALSE, optCtrl = list(maxfun = 1000))) + + +print(paste('AIC mlm:', AIC(mlm))) diff --git a/paper.aux b/paper.aux index c44e753..fba9a62 100644 --- a/paper.aux +++ b/paper.aux @@ -1,17 +1,30 @@ \relax +\@writefile{toc}{\contentsline {paragraph}{ Crimes 2001 - present}{1}} \@writefile{toc}{\contentsline {paragraph}{ 311 Service Requests: Vacant Buildings}{1}} -\@writefile{toc}{\contentsline {paragraph}{ CTA Ridership: Avg. Weekly Boardings during October 2010}{1}} -\@writefile{toc}{\contentsline {paragraph}{ Census Block Population }{1}} -\@writefile{toc}{\contentsline {paragraph}{ Public Health Statistics: selected public health indicators by Chicago community area}{1}} +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Food Desert Locations in Chicago}}{2}} +\newlabel{fig:desert}{{1}{2}} +\@writefile{toc}{\contentsline {paragraph}{ CTA Ridership: Avg. Weekly Boardings during October 2010}{2}} +\@writefile{toc}{\contentsline {paragraph}{ Census Block Population }{2}} +\@writefile{toc}{\contentsline {paragraph}{ Public Health Statistics: selected public health indicators by Chicago community area}{2}} \@writefile{toc}{\contentsline {paragraph}{ Census Data: Selected socioeconomic indicators }{2}} -\@writefile{toc}{\contentsline {paragraph}{ Race by Community Area }{2}} -\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Complete pooling model summary}}{4}} -\newlabel{cpresult}{{1}{4}} -\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces No pooling model summary}}{5}} -\newlabel{npresult}{{2}{5}} -\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Partial pooling model summary}}{6}} -\newlabel{ppresult}{{3}{6}} -\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Model AICs}}{6}} -\newlabel{AICs}{{4}{6}} -\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Model Cross Validated MSEs}}{6}} -\newlabel{MSEs}{{5}{6}} +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Neighborhoods by Black Percentage}}{3}} +\newlabel{fig:black}{{2}{3}} +\@writefile{toc}{\contentsline {paragraph}{ Race by Community Area }{3}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Neighborhoods by White Percentage}}{4}} +\newlabel{fig:white}{{3}{4}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Neighborhoods by Per Capita Income}}{5}} +\newlabel{fig:income}{{4}{5}} +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Neighborhoods by Number Vacant Buildings with 1 Mile Radius}}{6}} +\newlabel{fig:vacant}{{5}{6}} +\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Complete pooling model summary}}{7}} +\newlabel{cpresult}{{1}{7}} +\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces No pooling model summary}}{7}} +\newlabel{npresult}{{2}{7}} +\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Partial pooling model summary}}{8}} +\newlabel{ppresult}{{3}{8}} +\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Hierarchical Model Summary}}{8}} +\newlabel{mlm}{{4}{8}} +\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Model AICs}}{9}} +\newlabel{AICs}{{5}{9}} +\@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces Model Cross Validated MSEs}}{9}} +\newlabel{MSEs}{{6}{9}} diff --git a/paper.log b/paper.log index 292e067..ba90791 100644 --- a/paper.log +++ b/paper.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.17 (TeX Live 2016) (preloaded format=pdflatex 2016.5.22) 21 NOV 2016 15:12 +This is pdfTeX, Version 3.14159265-2.6-1.40.17 (TeX Live 2016) (preloaded format=pdflatex 2016.5.22) 22 NOV 2016 15:25 entering extended mode restricted \write18 enabled. file:line:error style messages enabled. @@ -109,21 +109,50 @@ LaTeX Font Info: Redeclaring font encoding OMS on input line 635. \mathdisplay@stack=\toks20 LaTeX Info: Redefining \[ on input line 2739. LaTeX Info: Redefining \] on input line 2740. -) (./paper.aux) +) +(/usr/local/texlive/2016/texmf-dist/tex/latex/graphics/graphicx.sty +Package: graphicx 2014/10/28 v1.0g Enhanced LaTeX Graphics (DPC,SPQR) + +(/usr/local/texlive/2016/texmf-dist/tex/latex/graphics/graphics.sty +Package: graphics 2016/05/09 v1.0r Standard LaTeX Graphics (DPC,SPQR) + +(/usr/local/texlive/2016/texmf-dist/tex/latex/graphics/trig.sty +Package: trig 2016/01/03 v1.10 sin cos tan (DPC) +) +(/usr/local/texlive/2016/texmf-dist/tex/latex/graphics-cfg/graphics.cfg +File: graphics.cfg 2016/01/02 v1.10 sample graphics configuration +) +Package graphics Info: Driver file: pdftex.def on input line 96. + +(/usr/local/texlive/2016/texmf-dist/tex/latex/pdftex-def/pdftex.def +File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX + +(/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/infwarerr.sty +Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO) +) +(/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/ltxcmds.sty +Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO) +) +\Gread@gobject=\count103 +)) +\Gin@req@height=\dimen120 +\Gin@req@width=\dimen121 +) +(./paper.aux) \openout1 = `paper.aux'. -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 10. -LaTeX Font Info: ... okay on input line 10. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 10. -LaTeX Font Info: ... okay on input line 10. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 10. -LaTeX Font Info: ... okay on input line 10. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 10. -LaTeX Font Info: ... okay on input line 10. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 10. -LaTeX Font Info: ... okay on input line 10. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 10. -LaTeX Font Info: ... okay on input line 10. +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 12. +LaTeX Font Info: ... okay on input line 12. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 12. +LaTeX Font Info: ... okay on input line 12. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 12. +LaTeX Font Info: ... okay on input line 12. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 12. +LaTeX Font Info: ... okay on input line 12. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 12. +LaTeX Font Info: ... okay on input line 12. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 12. +LaTeX Font Info: ... okay on input line 12. *geometry* driver: auto-detecting *geometry* detected driver: pdftex @@ -159,44 +188,146 @@ LaTeX Font Info: ... okay on input line 10. * \@reversemarginfalse * (1in=72.27pt=25.4mm, 1cm=28.453pt) +(/usr/local/texlive/2016/texmf-dist/tex/context/base/mkii/supp-pdf.mkii +[Loading MPS to PDF converter (version 2006.09.02).] +\scratchcounter=\count104 +\scratchdimen=\dimen122 +\scratchbox=\box28 +\nofMPsegments=\count105 +\nofMParguments=\count106 +\everyMPshowfont=\toks21 +\MPscratchCnt=\count107 +\MPscratchDim=\dimen123 +\MPnumerator=\count108 +\makeMPintoPDFobject=\count109 +\everyMPtoPDFconversion=\toks22 +) (/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/pdftexcmds.sty +Package: pdftexcmds 2016/05/10 v0.21 Utility functions of pdfTeX for LuaTeX (HO +) + +(/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/ifluatex.sty +Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO) +Package ifluatex Info: LuaTeX not detected. +) +Package pdftexcmds Info: LuaTeX not detected. +Package pdftexcmds Info: \pdf@primitive is available. +Package pdftexcmds Info: \pdf@ifprimitive is available. +Package pdftexcmds Info: \pdfdraftmode found. +) +(/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty +Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf + +(/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/grfext.sty +Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO) + +(/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/kvdefinekeys.sty +Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO) +)) +(/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/kvoptions.sty +Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO) + +(/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/kvsetkeys.sty +Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO) + +(/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/etexcmds.sty +Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO) +Package etexcmds Info: Could not find \expanded. +(etexcmds) That can mean that you are not using pdfTeX 1.50 or +(etexcmds) that some package has redefined \expanded. +(etexcmds) In the latter case, load this package earlier. +))) +Package grfext Info: Graphics extension search list: +(grfext) [.png,.pdf,.jpg,.mps,.jpeg,.jbig2,.jb2,.PNG,.PDF,.JPG,.JPE +G,.JBIG2,.JB2,.eps] +(grfext) \AppendGraphicsExtensions on input line 456. + +(/usr/local/texlive/2016/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg +File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv +e +)) [1 {/usr/local/texlive/2016/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] -[1] [1] -Underfull \hbox (badness 10000) in paragraph at lines 65--66 +[1] +File: deserts_plot.png Graphic file (type png) - [] + +Package pdftex.def Info: deserts_plot.png used on input line 37. +(pdftex.def) Requested size: 385.83467pt x 350.30255pt. + + +LaTeX Warning: `!h' float specifier changed to `!ht'. + + +File: pct_black_plot.png Graphic file (type png) + + +Package pdftex.def Info: pct_black_plot.png used on input line 45. +(pdftex.def) Requested size: 385.83467pt x 350.30255pt. + + +LaTeX Warning: `!h' float specifier changed to `!ht'. + + +File: pct_white_plot.png Graphic file (type png) -[2] + +Package pdftex.def Info: pct_white_plot.png used on input line 51. +(pdftex.def) Requested size: 385.83467pt x 350.30255pt. -LaTeX Warning: Float too large for page by 408.55894pt on input line 382. -[3] [4] [5] [6] (./paper.aux) ) +LaTeX Warning: `!h' float specifier changed to `!ht'. + + +File: income_plot.png Graphic file (type png) + +Package pdftex.def Info: income_plot.png used on input line 59. +(pdftex.def) Requested size: 385.83467pt x 350.30255pt. + + +LaTeX Warning: `!h' float specifier changed to `!ht'. + + +File: vacant_plot.png Graphic file (type png) + +Package pdftex.def Info: vacant_plot.png used on input line 67. +(pdftex.def) Requested size: 385.83467pt x 350.30255pt. + + +LaTeX Warning: `!h' float specifier changed to `!ht'. + +[1] +Underfull \hbox (badness 10000) in paragraph at lines 104--105 + + [] + +[2 <./deserts_plot.png>] [3 <./pct_black_plot.png>] [4 <./pct_white_plot.png>] +[5 <./income_plot.png>] [6 <./vacant_plot.png>] [7] [8] [9] (./paper.aux) ) Here is how much of TeX's memory you used: - 1383 strings out of 493014 - 17397 string characters out of 6133351 - 99672 words of memory out of 5000000 - 4962 multiletter control sequences out of 15000+600000 + 2630 strings out of 493014 + 36230 string characters out of 6133351 + 113228 words of memory out of 5000000 + 6147 multiletter control sequences out of 15000+600000 9369 words of font info for 34 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 27i,9n,32p,1165b,268s stack positions out of 5000i,500n,10000p,200000b,80000s - -Output written on paper.pdf (8 pages, 150024 bytes). + 37i,9n,32p,1165b,240s stack positions out of 5000i,500n,10000p,200000b,80000s + +Output written on paper.pdf (11 pages, 7826959 bytes). PDF statistics: - 79 PDF objects out of 1000 (max. 8388607) - 56 compressed objects within 1 object stream + 99 PDF objects out of 1000 (max. 8388607) + 63 compressed objects within 1 object stream 0 named destinations out of 1000 (max. 500000) - 1 words of extra memory for PDF output out of 10000 (max. 10000000) + 26 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/paper.pdf b/paper.pdf index c3789cc..b948dbc 100644 Binary files a/paper.pdf and b/paper.pdf differ diff --git a/paper.tex b/paper.tex index 7fc3b81..05a1681 100644 --- a/paper.tex +++ b/paper.tex @@ -1,8 +1,10 @@ \documentclass{report} \usepackage[margin=1in]{geometry} - \usepackage{amsmath} +\usepackage{graphicx} + + \title{Access to Food in Chicago: \\ a Hierarchical Perspective} \author{Daniel Berry} @@ -28,7 +30,44 @@ \subsection*{Food Deserts} % TODO: insert citation For this work we used the definition from %TODO: citation of defining a city block as being in a food desert if the city block is more than 1 mile from a supermarket. Supermarket in this context is a grocery store that is larger than 10000 square feet %TODO: citation -that is not primarily a liquor store. Distance is defined as the great circle distance from the center of mass of the city block to the center of mass of the grocery store. +that is not primarily a liquor store. Distance is defined as the great circle distance from the center of mass of the city block to the center of mass of the grocery store. + +We can see the location of food deserts as computed using our metric visualized in figure \ref{fig:desert}. +\begin{figure}[h!] + \includegraphics[scale=.2]{deserts_plot} + \caption{Food Desert Locations in Chicago} + \label{fig:desert} +\end{figure} + +Chicago is a very racially segregated city. As shown in figures \ref{fig:black} and \ref{fig:white}, many neighborhoods in Chicago are $>$75\% a single race. + +\begin{figure}[h!] + \includegraphics[scale=.2]{pct_black_plot} + \caption{Neighborhoods by Black Percentage} + \label{fig:black} +\end{figure} + +\begin{figure}[h!] + \includegraphics[scale=.2]{pct_white_plot} + \caption{Neighborhoods by White Percentage} + \label{fig:white} +\end{figure} + +In fact, it appears that many food deserts are located in majority black neighborhoods. We will explore this relationship later on during the model building phase. Chicago also has very strong class divisions. For example the (near) north side is very wealthy while the south side and suburbs are less so, see \ref{fig:income}. + +\begin{figure}[h!] + \includegraphics[scale=.2]{income_plot} + \caption{Neighborhoods by Per Capita Income} + \label{fig:income} +\end{figure} + +Perhaps instead, food deserts are associated with urban decay. We might hypothesize that + +\begin{figure}[h!] + \includegraphics[scale=.2]{vacant_plot} + \caption{Neighborhoods by Number Vacant Buildings with 1 Mile Radius} + \label{fig:vacant} +\end{figure} \section*{Methods} @@ -40,8 +79,8 @@ \subsection*{Data Gathering and Manipulation} \subsubsection*{Block level data} -% \paragraph{ Crimes 2001 - present} -% This file contains a record for crimes in Chicago since 2001 with information about the type of crime as well as its location. The location is pseudo-anonymized to be random but within the same city block. For each city block we counted the total number of crimes committed within 1 mile in 2009. Our hypothesis \textit{a priori} was that food deserts were often located in high-crime areas. +\paragraph{ Crimes 2001 - present} +This file contains a record for crimes in Chicago since 2001 with information about the type of crime as well as its location. The location is pseudo-anonymized to be random but within the same city block. For each city block we counted the total number of crimes committed within 1 mile in 2009. Our hypothesis \textit{a priori} was that food deserts were often located in high-crime areas. \paragraph{ 311 Service Requests: Vacant Buildings} @@ -116,7 +155,7 @@ \subsubsection*{Hierarchical} \subsection*{Model Comparison} -Models were compared using AIC and cross validated Breir Score (Mean Square Error in the case of 2-class logistic regression). Lower values of AIC indicate better fitting models and thus can be used to compare the performance of models to each other. The cross validation was performed 10 fold where the model was fit on 80\% of the data and evaluated on the remaining 20\%. This gives a way of quantifying the predictive ability of the model on new, unseen data. +Models were compared using AIC and cross validated Breir Score (Mean Square Error in the case of 2-class logistic regression). Lower values of AIC indicate better fitting models and thus can be used to compare the performance of models to each other. The cross validation was performed 10 fold where the model was fit on 80\% of the data (sampled in a stratified manner based on Neighborhood) and evaluated on the remaining 20\%. This gives a way of quantifying the predictive ability of the model on new, unseen city blocks. . \section*{Results} @@ -295,81 +334,82 @@ \subsubsection*{No Pooling} \hline \\[-1.8ex] CTA\_counts & $-$0.783$^{***}$ (0.061) \\ vacant\_counts & $-$0.408$^{***}$ (0.038) \\ - crime & 0.101$^{***}$ (0.023) \\ - NeighborhoodArcher Heights & $-$0.375 (1,446.209) \\ - NeighborhoodArmour Square & 17.931 (757.423) \\ - NeighborhoodAshburn & 18.317 (757.423) \\ - NeighborhoodAuburn Gresham & 14.142 (757.423) \\ - NeighborhoodAustin & 16.954 (757.423) \\ - NeighborhoodAvalon Park & 16.000 (757.423) \\ - NeighborhoodAvondale & 0.474 (1,120.121) \\ - NeighborhoodBelmont Cragin & $-$0.218 (950.607) \\ - NeighborhoodBeverly & 17.015 (757.423) \\ - NeighborhoodBridgeport & 16.189 (757.423) \\ - NeighborhoodBrighton Park & $-$0.312 (1,089.199) \\ - NeighborhoodBurnside & $-$0.026 (2,105.039) \\ - NeighborhoodCalumet Heights & 15.508 (757.423) \\ - NeighborhoodChatham & 17.625 (757.423) \\ - NeighborhoodChicago Lawn & 17.574 (757.423) \\ - NeighborhoodClearing & 20.174 (757.423) \\ - NeighborhoodDouglas & 0.258 (1,787.332) \\ - NeighborhoodDunning & 18.424 (757.423) \\ - NeighborhoodEast Side & $-$0.936 (1,160.353) \\ - NeighborhoodEdgewater & 0.313 (1,091.423) \\ - NeighborhoodEdison Park & 16.364 (757.423) \\ - NeighborhoodEnglewood & 19.671 (757.423) \\ - NeighborhoodForest Glen & 17.814 (757.423) \\ - NeighborhoodFuller Park & 18.528 (757.423) \\ - NeighborhoodGage Park & $-$0.006 (1,115.834) \\ - NeighborhoodGarfield Park & 20.438 (757.423) \\ - NeighborhoodGarfield Ridge & 18.394 (757.423) \\ - NeighborhoodGrand Boulevard & 18.242 (757.423) \\ - NeighborhoodGreater Grand Crossing & 1.179 (1,059.845) \\ - NeighborhoodHegewisch & 22.005 (757.423) \\ - NeighborhoodHermosa & $-$0.076 (1,317.960) \\ - NeighborhoodHumboldt Park & 14.370 (757.423) \\ - NeighborhoodHyde Park & 0.316 (1,584.758) \\ - NeighborhoodIrving Park & $-$0.052 (1,000.494) \\ - NeighborhoodJefferson Park & $-$0.612 (1,083.140) \\ - NeighborhoodKenwood & 0.162 (1,831.358) \\ - NeighborhoodLake View & 0.573 (996.840) \\ - NeighborhoodLincoln Park & 0.284 (1,090.999) \\ - NeighborhoodLincoln Square & 0.152 (1,086.535) \\ - NeighborhoodLogan Square & 0.374 (928.612) \\ - NeighborhoodLoop & 4.593 (1,627.447) \\ - NeighborhoodLower West Side & $-$0.179 (1,141.349) \\ - NeighborhoodMcKinley Park & $-$0.425 (1,386.519) \\ - NeighborhoodMontclaire & $-$0.665 (1,428.421) \\ - NeighborhoodMorgan Park & 14.296 (757.423) \\ - NeighborhoodMount Greenwood & $-$1.035 (1,240.962) \\ - NeighborhoodNear North Side & 1.286 (1,065.410) \\ - NeighborhoodNear South Side & 0.203 (1,644.854) \\ - NeighborhoodNear West Side & 19.807 (757.423) \\ - NeighborhoodNew City & 17.533 (757.423) \\ - NeighborhoodNorth Center & 0.025 (1,072.777) \\ - NeighborhoodNorth Lawndale & 20.140 (757.423) \\ - NeighborhoodNorth Park & $-$0.485 (1,314.011) \\ - NeighborhoodNorwood Park & 15.859 (757.423) \\ - NeighborhoodO'Hare & 17.366 (757.423) \\ - NeighborhoodOakland & $-$0.333 (2,335.785) \\ - NeighborhoodPortage Park & 17.004 (757.423) \\ - NeighborhoodPullman & 20.484 (757.423) \\ - NeighborhoodRiverdale & $-$0.950 (2,642.673) \\ - NeighborhoodRogers Park & $-$0.180 (1,140.236) \\ - NeighborhoodRoseland & 21.135 (757.423) \\ - NeighborhoodSouth Chicago & 0.471 (1,066.274) \\ - NeighborhoodSouth Deering & 18.462 (757.423) \\ - NeighborhoodSouth Lawndale & $-$0.066 (1,038.034) \\ - NeighborhoodSouth Shore & 14.902 (757.423) \\ - NeighborhoodUptown & 0.845 (1,290.142) \\ - NeighborhoodWashington Heights & 17.343 (757.423) \\ - NeighborhoodWashington Park & 19.848 (757.423) \\ - NeighborhoodWest Elsdon & $-$0.357 (1,287.354) \\ - NeighborhoodWest Lawn & 18.031 (757.423) \\ - NeighborhoodWest Pullman & 20.427 (757.423) \\ - NeighborhoodWest Ridge & 18.428 (757.423) \\ - NeighborhoodWest Town & 0.566 (941.296) \\ - NeighborhoodWoodlawn & 17.508 (757.423) \\ + crime & 0.101$^{***}$ (0.023) \\ + \textit{Neighborhood intercepts ommitted due to space} & \\ + % NeighborhoodArcher Heights & $-$0.375 (1,446.209) \\ + % NeighborhoodArmour Square & 17.931 (757.423) \\ + % NeighborhoodAshburn & 18.317 (757.423) \\ + % NeighborhoodAuburn Gresham & 14.142 (757.423) \\ + % NeighborhoodAustin & 16.954 (757.423) \\ + % NeighborhoodAvalon Park & 16.000 (757.423) \\ + % NeighborhoodAvondale & 0.474 (1,120.121) \\ + % NeighborhoodBelmont Cragin & $-$0.218 (950.607) \\ + % NeighborhoodBeverly & 17.015 (757.423) \\ + % NeighborhoodBridgeport & 16.189 (757.423) \\ + % NeighborhoodBrighton Park & $-$0.312 (1,089.199) \\ + % NeighborhoodBurnside & $-$0.026 (2,105.039) \\ + % NeighborhoodCalumet Heights & 15.508 (757.423) \\ + % NeighborhoodChatham & 17.625 (757.423) \\ + % NeighborhoodChicago Lawn & 17.574 (757.423) \\ + % NeighborhoodClearing & 20.174 (757.423) \\ + % NeighborhoodDouglas & 0.258 (1,787.332) \\ + % NeighborhoodDunning & 18.424 (757.423) \\ + % NeighborhoodEast Side & $-$0.936 (1,160.353) \\ + % NeighborhoodEdgewater & 0.313 (1,091.423) \\ + % NeighborhoodEdison Park & 16.364 (757.423) \\ + % NeighborhoodEnglewood & 19.671 (757.423) \\ + % NeighborhoodForest Glen & 17.814 (757.423) \\ + % NeighborhoodFuller Park & 18.528 (757.423) \\ + % NeighborhoodGage Park & $-$0.006 (1,115.834) \\ + % NeighborhoodGarfield Park & 20.438 (757.423) \\ + % NeighborhoodGarfield Ridge & 18.394 (757.423) \\ + % NeighborhoodGrand Boulevard & 18.242 (757.423) \\ + % NeighborhoodGreater Grand Crossing & 1.179 (1,059.845) \\ + % NeighborhoodHegewisch & 22.005 (757.423) \\ + % NeighborhoodHermosa & $-$0.076 (1,317.960) \\ + % NeighborhoodHumboldt Park & 14.370 (757.423) \\ + % NeighborhoodHyde Park & 0.316 (1,584.758) \\ + % NeighborhoodIrving Park & $-$0.052 (1,000.494) \\ + % NeighborhoodJefferson Park & $-$0.612 (1,083.140) \\ + % NeighborhoodKenwood & 0.162 (1,831.358) \\ + % NeighborhoodLake View & 0.573 (996.840) \\ + % NeighborhoodLincoln Park & 0.284 (1,090.999) \\ + % NeighborhoodLincoln Square & 0.152 (1,086.535) \\ + % NeighborhoodLogan Square & 0.374 (928.612) \\ + % NeighborhoodLoop & 4.593 (1,627.447) \\ + % NeighborhoodLower West Side & $-$0.179 (1,141.349) \\ + % NeighborhoodMcKinley Park & $-$0.425 (1,386.519) \\ + % NeighborhoodMontclaire & $-$0.665 (1,428.421) \\ + % NeighborhoodMorgan Park & 14.296 (757.423) \\ + % NeighborhoodMount Greenwood & $-$1.035 (1,240.962) \\ + % NeighborhoodNear North Side & 1.286 (1,065.410) \\ + % NeighborhoodNear South Side & 0.203 (1,644.854) \\ + % NeighborhoodNear West Side & 19.807 (757.423) \\ + % NeighborhoodNew City & 17.533 (757.423) \\ + % NeighborhoodNorth Center & 0.025 (1,072.777) \\ + % NeighborhoodNorth Lawndale & 20.140 (757.423) \\ + % NeighborhoodNorth Park & $-$0.485 (1,314.011) \\ + % NeighborhoodNorwood Park & 15.859 (757.423) \\ + % NeighborhoodO'Hare & 17.366 (757.423) \\ + % NeighborhoodOakland & $-$0.333 (2,335.785) \\ + % NeighborhoodPortage Park & 17.004 (757.423) \\ + % NeighborhoodPullman & 20.484 (757.423) \\ + % NeighborhoodRiverdale & $-$0.950 (2,642.673) \\ + % NeighborhoodRogers Park & $-$0.180 (1,140.236) \\ + % NeighborhoodRoseland & 21.135 (757.423) \\ + % NeighborhoodSouth Chicago & 0.471 (1,066.274) \\ + % NeighborhoodSouth Deering & 18.462 (757.423) \\ + % NeighborhoodSouth Lawndale & $-$0.066 (1,038.034) \\ + % NeighborhoodSouth Shore & 14.902 (757.423) \\ + % NeighborhoodUptown & 0.845 (1,290.142) \\ + % NeighborhoodWashington Heights & 17.343 (757.423) \\ + % NeighborhoodWashington Park & 19.848 (757.423) \\ + % NeighborhoodWest Elsdon & $-$0.357 (1,287.354) \\ + % NeighborhoodWest Lawn & 18.031 (757.423) \\ + % NeighborhoodWest Pullman & 20.427 (757.423) \\ + % NeighborhoodWest Ridge & 18.428 (757.423) \\ + % NeighborhoodWest Town & 0.566 (941.296) \\ + % NeighborhoodWoodlawn & 17.508 (757.423) \\ Constant & $-$20.678 (757.423) \\ \hline \\[-1.8ex] Observations & 36,870 \\ @@ -441,11 +481,42 @@ \subsubsection*{Partial Pooling} \subsubsection*{Hierarchical} +% Table created by stargazer v.5.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu +% Date and time: Tue, Nov 22, 2016 - 15:04:24 +\begin{table}[!htbp] \centering + \caption{Hierarchical Model Summary} + \label{mlm} +\begin{tabular}{@{\extracolsep{5pt}}lc} +\\[-1.8ex]\hline +\hline \\[-1.8ex] + & \multicolumn{1}{c}{\textit{Dependent variable:}} \\ +\cline{2-2} +\\[-1.8ex] & desert \\ +\hline \\[-1.8ex] + CTA\_counts & $-$0.779$^{***}$ (0.061) \\ + crime & 0.102$^{***}$ (0.023) \\ + vacant\_counts & $-$0.413$^{***}$ (0.038) \\ + Cancer..All.Sites. & 3.289$^{***}$ (0.852) \\ + Diabetes.related & $-$1.954$^{**}$ (0.801) \\ + Dependency & 1.158$^{*}$ (0.695) \\ + TOTAL.POPULATION & $-$0.147$^{***}$ (0.048) \\ + Constant & $-$6.221$^{***}$ (0.522) \\ +\hline \\[-1.8ex] +Observations & 36,870 \\ +Log Likelihood & $-$6,540.603 \\ +Akaike Inf. Crit. & 13,099.210 \\ +Bayesian Inf. Crit. & 13,175.840 \\ +\hline +\hline \\[-1.8ex] +\textit{Note:} & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\ +\end{tabular} +\end{table} + \subsection*{Model Comparison} -We can see from table \ref{AICs} that the No Pooling model has the lowest AIC which is to be expected as in a certain sense this model has the most flexibility. The intercept term for each neighborhood is the average of only the observations in that neighborhood and is not ``shrunk'' to any sort of common mean. +We can see from table \ref{AICs} that the No Pooling model has the lowest AIC which is to be expected as in a certain sense this model has the most flexibility. The intercept term for each neighborhood is the average of only the observations in that neighborhood and is not ``shrunk'' to any sort of common mean. The intercepts for the hierarchical model are shrunk due to the effect of the neighborhood level regressors. -As we can see from table \ref{MSEs}, %TODO: write section on which MSEs are the smallest. +As we can see from table \ref{MSEs}, the Hierarchical model outperforms all 4 types of models in terms of held out predictive ability. \begin{table}[!htbp] \centering \caption{Model AICs} @@ -453,10 +524,10 @@ \subsection*{Model Comparison} \begin{tabular}[c]{c|c} \\ Model & AIC \\ \hline \\ - Complete Pooling & 0 \\ - No Pooling & 0 \\ - Partial Pooling & 0 \\ - Hierarchical & 0 \\ + Complete Pooling & 19826.7 \\ + No Pooling & 12970.3 \\ + Partial Pooling & 13139.4 \\ + Hierarchical & 13099.2 \\ \end{tabular} \end{table} @@ -464,17 +535,25 @@ \subsection*{Model Comparison} \caption{Model Cross Validated MSEs} \label{MSEs} \begin{tabular}[c]{c|c} -\\ Model & AIC \\ +\\ Model & MSE \\ \hline \\ - Complete Pooling & 0 \\ - No Pooling & 0 \\ - Partial Pooling & 0 \\ - Hierarchical & 0 \\ + Complete Pooling & 0.07382216 \\ + No Pooling & 0.05328587 \\ + Partial Pooling & 0.05329956 \\ + Hierarchical & 0.05323632 \\ \end{tabular} \end{table} \section*{Conclusions} +In terms of cross validated accuracy: the hierarchical model was more accurate on average on new city blocks than the other 3 models indicating support for the hierarchical structure of the data. However, the evidence was not as strong as the author would have liked. Consider the model summary in table \ref{mlm}. We see that food deserts tend to be located in neighborhoods with higher incidences of all site cancer. Perhaps surprisingly, in the prescence of the other information, a block in a neighborhood with higher incidences of diabetes was less likely to be in a food desert. City blocks in neighborhoods that are more populous (TOTAL.POPULATION) are less likely to be food deserts. Finally, blocks in neighborhoods with higher rates of dependency (\% of the population younger than 18 or older than 64) are more likely to be in food deserts. + +While we have evidence for the utility of multi-level data for modeling food desert prescence in Chicago, the knowledge in this report is likely common knowledge for anyone working in this field. + +Public health variables tended to be more predictive than purely racial variables, although there is a strong correlation between race and health in Chicago. + +\section*{Future Work} +Some issues due to not having data from grocery stores outside the city limits, could affect food desert status of city blocks near the borders. \end{document} \ No newline at end of file diff --git a/plots.r b/plots.r index 97f17aa..9c41135 100644 --- a/plots.r +++ b/plots.r @@ -111,15 +111,92 @@ for (covar in potential_covariates) { all_data$desert_logical <- all_data$desert == 1 -ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = desert_logical)) + +deserts_plot <- ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = desert_logical)) + geom_point(alpha = .1) + theme_bw() + scale_color_manual(values = c('grey', 'black')) + - geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + labs(x = NULL, y = NULL, title = 'Food Desert Locations in Chicago') + + guides(color = guide_legend(title = 'Food Desert Status')) + + theme(axis.ticks.x = element_blank(), + axis.text.x=element_blank(), + axis.ticks.y = element_blank(), + axis.text.y=element_blank()) + +ggsave('deserts_plot.png', deserts_plot) -ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = NHB_p)) + + +pct_black_plot <- ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = NHB_p)) + + geom_point(alpha = .1) + + theme_bw() + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + labs(x = NULL, y = NULL, title = 'Racial Segregation: % Black') + + guides(color = guide_legend(title = 'Percent Black')) + + theme(axis.ticks.x = element_blank(), + axis.text.x=element_blank(), + axis.ticks.y = element_blank(), + axis.text.y=element_blank()) +ggsave('pct_black_plot.png', pct_black_plot) + +pct_white_plot <- ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = NHW_p)) + geom_point(alpha = .1) + theme_bw() + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + - labs() - + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + labs(x = NULL, y = NULL, title = 'Racial Segregation: % White') + + guides(color = guide_legend(title = 'Percent White')) + + theme(axis.ticks.x = element_blank(), + axis.text.x=element_blank(), + axis.ticks.y = element_blank(), + axis.text.y=element_blank()) + +ggsave('pct_white_plot.png', pct_white_plot) + + +crime_plot <- ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = crime)) + + geom_point(alpha = .1) + + theme_bw() + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + labs(x = NULL, y = NULL, title = 'Total Crime Within 1 Mile') + + guides(color = guide_legend(title = 'Total Crime')) + + theme(axis.ticks.x = element_blank(), + axis.text.x=element_blank(), + axis.ticks.y = element_blank(), + axis.text.y=element_blank()) + +ggsave('crime_plot.png', crime_plot) +crime_plot + +income_plot <- ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = PER.CAPITA.INCOME)) + + geom_point(alpha = .1) + + theme_bw() + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + labs(x = NULL, y = NULL, title = 'Per Capita Income') + + guides(color = guide_legend(title = 'Income')) + + theme(axis.ticks.x = element_blank(), + axis.text.x=element_blank(), + axis.ticks.y = element_blank(), + axis.text.y=element_blank()) + +ggsave('income_plot.png', income_plot) +income_plot + +vacant_plot <- ggplot(all_data, aes(x = Longitude_t, y = Latitude_t, color = vacant_counts)) + + geom_point(alpha = .1) + + theme_bw() + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + geom_path(data = nbhd_df, aes(long, lat, group = id, color = NULL)) + + labs(x = NULL, y = NULL, title = 'Vacant Buildings within 1 Mile') + + guides(color = guide_legend(title = 'Vacancy')) + + theme(axis.ticks.x = element_blank(), + axis.text.x=element_blank(), + axis.ticks.y = element_blank(), + axis.text.y=element_blank()) + +ggsave('vacant_plot.png', vacant_plot) +vacant_plot + +pct_white_plot