From c89ce18e046e549f746ba6b0d80533d7ffd7a08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koles=C3=A1r?= Date: Fri, 15 Jul 2022 14:53:35 -0400 Subject: [PATCH] Effective number of observations for RD_OPT --- .gitignore | 1 - R/RD_opt.R | 7 ++--- inst/WORDLIST | 54 +++++++++++++++++++++++++++++++++++++++ tests/spelling.R | 2 +- tests/testthat/test_lpp.R | 20 +++++++++++++-- tests/testthat/test_rd.R | 11 ++++++++ vignettes/RDHonest.Rmd | 2 +- 7 files changed, 89 insertions(+), 8 deletions(-) create mode 100644 inst/WORDLIST diff --git a/.gitignore b/.gitignore index 3896ab9..bc28a26 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -inst/ *.el notes.org /Meta/ diff --git a/R/RD_opt.R b/R/RD_opt.R index 8a485ea..e62967e 100644 --- a/R/RD_opt.R +++ b/R/RD_opt.R @@ -78,12 +78,13 @@ RDTEstimator <- function(d, f, alpha, se.method, J) { upper <- Lhat + maxbias + stats::qnorm(1-alpha)*sd hl <- CVb(maxbias/sd, alpha) * sd # Half-length - ## Effective number of observations: TODO - ## eff.obs <- 1/sum(Wp^2) + 1/sum(Wm^2) + r.u <- NPRreg.fit(d, max(abs(d$X[W!=0])), kern="uniform") + eff.obs <- r.u$eff.obs*sum(r.u$w^2)/sum(W^2) + coef <- data.frame(term="Sharp RD parameter", estimate=Lhat, std.error=sd, maximum.bias=maxbias, conf.low=Lhat-hl, conf.high=Lhat+hl, conf.low.onesided=lower, - conf.high.onesided=upper, eff.obs=NA, #TODO + conf.high.onesided=upper, eff.obs=eff.obs, cv=CVb(maxbias/sd, alpha), alpha=alpha, method="Taylor") structure(list(coefficients=coef, data=d, delta=sqrt(4*q), omega=2*b), diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 0000000..8ce2fdb --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,54 @@ +BME +Battistin +Brugiavini +CMD +CRC +Christoph +Econometrica +Eicker +Epanechnikov +Gijbels +Guglielmo +Hölder +Imbens +Irène +Jens +Jianqing +Kalyanaraman +Karthik +Lalive +Lalive's +Lindeberg +Modelling +Optimality +Oreopoulos +REBP +Rettore +Rothe +SES +Silverman's +cdot +coercible +cv +dotsc +downarrow +estimand +frac +geq +infty +leq +lim +mathbb +mathcal +misspecification +nn +optimality +overset +pmatrix +preprint +priori +qquad +se +th +uparrow +widehat diff --git a/tests/spelling.R b/tests/spelling.R index c61948b..6713838 100644 --- a/tests/spelling.R +++ b/tests/spelling.R @@ -1,3 +1,3 @@ -if(requireNamespace("spelling", quietly = TRUE)) +if(requireNamespace('spelling', quietly = TRUE)) spelling::spell_check_test(vignettes = TRUE, error = FALSE, skip_on_cran = TRUE) diff --git a/tests/testthat/test_lpp.R b/tests/testthat/test_lpp.R index 91d8ac7..8e4f7b1 100644 --- a/tests/testthat/test_lpp.R +++ b/tests/testthat/test_lpp.R @@ -11,8 +11,8 @@ test_that("Inference at point agrees with RD", { expect_equal(pp$estimate-mm$estimate, rde$estimate) expect_equal(pp$std.error^2+mm$std.error^2, rde$std.error^2) expect_equal(pp$maximum.bias+mm$maximum.bias, rde$maximum.bias) - ## TODO - ## expect_equal(mm$eff.obs+pp$eff.obs, rde$eff.obs) + + p2 <- RDHonest(voteshare~margin, data=lee08, subset=margin>=0, h=5, M=2, point.inference=TRUE) @@ -70,8 +70,24 @@ test_that("Optimal bandwidth calculations", { kern="uniform", opt.criterion="FLCI", point.inference=TRUE) expect_equal(rr$coefficients$conf.high.onesided, 55.24963853) + expect_equal(rr$coefficients$eff.obs, 858) + expect_equal(rr$coefficients$eff.obs, + sum(lee08$margin<=rr$coefficients$bandwidth & lee08$margin>0)) dp <- NPRData(lee08[lee08$margin>0, ], cutoff=0, "IP") Mh <- rr$coefficients$M + + re <- RDHonest(voteshare ~ margin, data=lee08, subset=(margin>0), + kern="uniform", opt.criterion="FLCI", point.inference=TRUE, + se.method="EHW") + ## Should match regression + rl <- lm(voteshare ~ margin, data=lee08, + subset=margin>0 & margin<=rr$coefficients$bandwidth) + XX <- model.matrix(rl) + meat <- crossprod(XX, rl$residuals^2*XX) + vl <- (solve(crossprod(XX)) %*% meat %*% solve(crossprod(XX)))[1, 1] + expect_equal(sqrt(vl), re$coefficients$std.error) + expect_equal(unname(rl$coefficients[1]), rr$coefficients$estimate) + r1 <- RDHonest(voteshare~margin, data=lee08, subset=margin>0, M=2*Mh, opt.criterion="MSE", point.inference=TRUE) r <- capture.output(print(r1, digits=4)) diff --git a/tests/testthat/test_rd.R b/tests/testthat/test_rd.R index 7d2c752..9083d25 100644 --- a/tests/testthat/test_rd.R +++ b/tests/testthat/test_rd.R @@ -70,6 +70,17 @@ test_that("Honest inference in Lee and LM data", { kern="uniform", h=18, M=0.1, se.method="EHW") r2 <- RDHonest(mortHS ~ povrate60, data=headst, kern="uniform", h=18, M=0.1, se.method="nn") + ## Should match regression + rl <- lm(mortHS ~ povrate60*I(povrate60>=0), data=headst, + subset=abs(povrate60)<=18) + XX <- model.matrix(rl) + meat <- crossprod(XX, rl$residuals^2*XX) + vl <- (solve(crossprod(XX)) %*% meat %*% solve(crossprod(XX)))[3, 3] + expect_equal(sqrt(vl), r1$coefficients$std.error) + expect_equal(unname(rl$coefficients[3]), r1$coefficients$estimate) + + expect_equal(r1$coefficients$eff.obs, 954) + expect_equal(954, sum(abs(r1$d$X)<=18)) r1o <- capture.output(print(r1, digits=4)) r2o <- capture.output(print(r2, digits=4)) expect_equal(r1$coefficients$estimate, -1.1982581306) diff --git a/vignettes/RDHonest.Rmd b/vignettes/RDHonest.Rmd index 9a63e96..ccbb617 100644 --- a/vignettes/RDHonest.Rmd +++ b/vignettes/RDHonest.Rmd @@ -536,7 +536,7 @@ RDHonest(voteshare ~ margin, data=lee08, subset=margin>0, In fuzzy RD, by Theorem B.2 in the appendix to @ArKo16honest, the estimator is asymptotically equivalent to \begin{equation*} -\sum_i w_i (Y_i-D_i\beta)/fs, +\sum_i w_i (Y_i-D_i\beta)/first stage, \end{equation*} so the Lindeberg weights are the same as in the sharp case.