Add covariates to Battistin et al

kolesarm · May 7, 2024 · bd6e1ea · bd6e1ea
1 parent cf4d3ae
commit bd6e1ea
Show file tree

Hide file tree

Showing 9 changed files with 79 additions and 13 deletions.
diff --git a/R/checks.R b/R/checks.R
@@ -23,6 +23,10 @@ process_options <- function(M, se.method, method, d, kern) {
             stop(paste0("M must be a non-negative numeric vector of length",
                         m_len, "."))
     }
+
+    if (min(sum(d$p), sum(d$m))==0) {
+        stop("No observations on one side of the cutoff")
+    }
     if (!(se.method %in% c("nn", "EHW", "supplied.var"))) {
         stop("Unsupported se.method")
     }

diff --git a/R/documentation.R b/R/documentation.R
@@ -120,6 +120,10 @@
 #'   \item{food}{Total household food expenditure}
 #'   \item{c}{Total household consumption}
 #'   \item{cn}{Total household expenditure on non-durable goods}
+#'   \item{education}{Educational attainment (males), one of: "none",
+#'         "elementary school", "lower secondary", "vocational studies",
+#'         "upper secondary", "college or higher")}
+#'   \item{family_size}{Family size}
 #' }
 #'
 #' @references{

diff --git a/data-raw/data-prep.R b/data-raw/data-prep.R
@@ -155,22 +155,17 @@ headst <- headst[, c(1:6, 10, 9, 11:16, 7:8, 17:18)]
 usethis::use_data(headst, overwrite=TRUE, internal=FALSE)
 
 ## 4. Battistin et al data from AER website
+## https://doi.org/10.1257/aer.99.5.2209
 dir5 <- "~/teaching/Datasets/BattistinEtAl2009/datapaper_ab.dta"
 rcp <- readstata13::read.dta13(dir5, generate.factors=TRUE, nonint.factors=TRUE)
-rcp <- rcp[, c(2, 29, 27, 8, 4, 6)]
+rcp <- rcp[, c("anno", "esse_m", "qu_m", "jconsal", "c", "cn", "educ_m",
+               "ncomp")]
+## 30,703 obs, as in Step 2 in Table 3.
+
 ## Survey year
-names(rcp) <- c("survey_year", "elig_year", "retired", "food", "c", "cn")
+names(rcp) <- c("survey_year", "elig_year", "retired", "food", "c", "cn",
+                "education", "family_size")
 rcp$retired <- rcp$retired == "retired"
-## drop if at
+## drop if at 0, since eligibility is not clear here.
 rcp <- rcp[rcp$elig_year!=0, ]
 usethis::use_data(rcp, overwrite=TRUE, internal=FALSE)
-
-## // profiles by S: collapse (mean) mc=lnc mcn=lncn mf=lncf ret=retired
-## hh=hh_size age=age minors=minors children=children mcfp=lncfp, by(time anno)
-
-## generate elig = time>=0
-## keep if abs(time)<=10 & time!=0
-
-## IV regressions with year dummies: Table 5
-## ivregress 2sls mcn (ret=elig) time c.time#c.time i.anno, robust first
-## ivregress 2sls mf (ret=elig) time c.time#c.time i.anno, robust
diff --git a/data/rcp.rda b/data/rcp.rda
diff --git a/doc/RDHonest.R b/doc/RDHonest.R
@@ -81,6 +81,19 @@ ci_len <- c(rc$coefficients$conf.high-rc$coefficients$conf.low,
             rn$coefficients$conf.high-rn$coefficients$conf.low)
 100 * (1 - ci_len[1]/ci_len[2])
 
+## -----------------------------------------------------------------------------
+RDHonest(log(cn) | retired ~ elig_year | education, data=rcp,
+         T0=r$coefficients$estimate)
+
+## ----fig.width=4.5, fig.height=3.5, fig.cap="Battistin et al (2009) data"-----
+## see Figure 3
+f3 <- RDScatter(log(cn)~elig_year, data=rcp, cutoff=0, avg=Inf,
+                xlab="Years to eligibility",
+                ylab="Log consumption of non-durables", propdotsize=TRUE,
+                subset=abs(elig_year)<10)
+## Adjust size of dots if they are too big
+f3 + ggplot2::scale_size_area(max_size = 5)
+
 ## -----------------------------------------------------------------------------
 dd <- data.frame()
 ## Collapse data by running variable

diff --git a/doc/RDHonest.Rmd b/doc/RDHonest.Rmd
@@ -717,6 +717,29 @@ $\tilde{\theta}_{h}=\tilde{\tau}_{Y, h}/\tilde{\tau}_{D, h}$, with variances and
 worst-case bias computed as in the case without covariates, replacing the
 treatment and outcome with their covariate-adjusted versions.
 
+A demonstration using the `rcp` data, where we add education controls:
+```{r}
+RDHonest(log(cn) | retired ~ elig_year | education, data=rcp,
+         T0=r$coefficients$estimate)
+```
+
+Relative to the previous estimate without covariates, the point estimate is now
+much larger. This is in part due to slightly smaller bandwidth used, and the
+regression function for the reduced form appears noisy below the cutoff,
+potentially due to measurement error: see Figure 3. The noise is also
+responsible for the rather large data-driven estimates of the curvature
+parameters.
+
+```{r, fig.width=4.5, fig.height=3.5, fig.cap="Battistin et al (2009) data"}
+## see Figure 3
+f3 <- RDScatter(log(cn)~elig_year, data=rcp, cutoff=0, avg=Inf,
+                xlab="Years to eligibility",
+                ylab="Log consumption of non-durables", propdotsize=TRUE,
+                subset=abs(elig_year)<10)
+## Adjust size of dots if they are too big
+f3 + ggplot2::scale_size_area(max_size = 5)
+```
+
 
 ## Aggregated data and weighted regression
 

diff --git a/doc/RDHonest.pdf b/doc/RDHonest.pdf
diff --git a/man/rcp.Rd b/man/rcp.Rd
diff --git a/vignettes/RDHonest.Rmd b/vignettes/RDHonest.Rmd
@@ -717,6 +717,29 @@ $\tilde{\theta}_{h}=\tilde{\tau}_{Y, h}/\tilde{\tau}_{D, h}$, with variances and
 worst-case bias computed as in the case without covariates, replacing the
 treatment and outcome with their covariate-adjusted versions.
 
+A demonstration using the `rcp` data, where we add education controls:
+```{r}
+RDHonest(log(cn) | retired ~ elig_year | education, data=rcp,
+         T0=r$coefficients$estimate)
+```
+
+Relative to the previous estimate without covariates, the point estimate is now
+much larger. This is in part due to slightly smaller bandwidth used, and the
+regression function for the reduced form appears noisy below the cutoff,
+potentially due to measurement error: see Figure 3. As a result, the estimates
+are quite sensitive to the bandwidth used. The noise is also responsible for the
+rather large data-driven estimates of the curvature parameters.
+
+```{r, fig.width=4.5, fig.height=3.5, fig.cap="Battistin et al (2009) data"}
+## see Figure 3
+f3 <- RDScatter(log(cn)~elig_year, data=rcp, cutoff=0, avg=Inf,
+                xlab="Years to eligibility",
+                ylab="Log consumption of non-durables", propdotsize=TRUE,
+                subset=abs(elig_year)<15)
+## Adjust size of dots if they are too big
+f3 + ggplot2::scale_size_area(max_size = 5)
+```
+
 
 ## Aggregated data and weighted regression