From c89ce18e046e549f746ba6b0d80533d7ffd7a08a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Koles=C3=A1r?= <kolesarmi@googlemail.com>
Date: Fri, 15 Jul 2022 14:53:35 -0400
Subject: [PATCH] Effective number of observations for RD_OPT

---
 .gitignore                |  1 -
 R/RD_opt.R                |  7 ++---
 inst/WORDLIST             | 54 +++++++++++++++++++++++++++++++++++++++
 tests/spelling.R          |  2 +-
 tests/testthat/test_lpp.R | 20 +++++++++++++--
 tests/testthat/test_rd.R  | 11 ++++++++
 vignettes/RDHonest.Rmd    |  2 +-
 7 files changed, 89 insertions(+), 8 deletions(-)
 create mode 100644 inst/WORDLIST

diff --git a/.gitignore b/.gitignore
index 3896ab9..bc28a26 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,3 @@
-inst/
 *.el
 notes.org
 /Meta/
diff --git a/R/RD_opt.R b/R/RD_opt.R
index 8a485ea..e62967e 100644
--- a/R/RD_opt.R
+++ b/R/RD_opt.R
@@ -78,12 +78,13 @@ RDTEstimator <- function(d, f, alpha, se.method, J) {
     upper <- Lhat + maxbias + stats::qnorm(1-alpha)*sd
     hl <- CVb(maxbias/sd, alpha) * sd # Half-length
 
-    ## Effective number of observations: TODO
-    ## eff.obs <- 1/sum(Wp^2) + 1/sum(Wm^2)
+    r.u <- NPRreg.fit(d, max(abs(d$X[W!=0])), kern="uniform")
+    eff.obs <- r.u$eff.obs*sum(r.u$w^2)/sum(W^2)
+
     coef <- data.frame(term="Sharp RD parameter", estimate=Lhat, std.error=sd,
                        maximum.bias=maxbias, conf.low=Lhat-hl,
                        conf.high=Lhat+hl, conf.low.onesided=lower,
-                       conf.high.onesided=upper, eff.obs=NA, #TODO
+                       conf.high.onesided=upper, eff.obs=eff.obs,
                        cv=CVb(maxbias/sd, alpha), alpha=alpha, method="Taylor")
 
     structure(list(coefficients=coef, data=d, delta=sqrt(4*q), omega=2*b),
diff --git a/inst/WORDLIST b/inst/WORDLIST
new file mode 100644
index 0000000..8ce2fdb
--- /dev/null
+++ b/inst/WORDLIST
@@ -0,0 +1,54 @@
+BME
+Battistin
+Brugiavini
+CMD
+CRC
+Christoph
+Econometrica
+Eicker
+Epanechnikov
+Gijbels
+Guglielmo
+Hölder
+Imbens
+Irène
+Jens
+Jianqing
+Kalyanaraman
+Karthik
+Lalive
+Lalive's
+Lindeberg
+Modelling
+Optimality
+Oreopoulos
+REBP
+Rettore
+Rothe
+SES
+Silverman's
+cdot
+coercible
+cv
+dotsc
+downarrow
+estimand
+frac
+geq
+infty
+leq
+lim
+mathbb
+mathcal
+misspecification
+nn
+optimality
+overset
+pmatrix
+preprint
+priori
+qquad
+se
+th
+uparrow
+widehat
diff --git a/tests/spelling.R b/tests/spelling.R
index c61948b..6713838 100644
--- a/tests/spelling.R
+++ b/tests/spelling.R
@@ -1,3 +1,3 @@
-if(requireNamespace("spelling", quietly = TRUE))
+if(requireNamespace('spelling', quietly = TRUE))
   spelling::spell_check_test(vignettes = TRUE, error = FALSE,
                              skip_on_cran = TRUE)
diff --git a/tests/testthat/test_lpp.R b/tests/testthat/test_lpp.R
index 91d8ac7..8e4f7b1 100644
--- a/tests/testthat/test_lpp.R
+++ b/tests/testthat/test_lpp.R
@@ -11,8 +11,8 @@ test_that("Inference at point agrees with RD", {
     expect_equal(pp$estimate-mm$estimate, rde$estimate)
     expect_equal(pp$std.error^2+mm$std.error^2, rde$std.error^2)
     expect_equal(pp$maximum.bias+mm$maximum.bias, rde$maximum.bias)
-    ## TODO
-    ## expect_equal(mm$eff.obs+pp$eff.obs, rde$eff.obs)
+
+
 
     p2 <- RDHonest(voteshare~margin, data=lee08, subset=margin>=0, h=5, M=2,
                    point.inference=TRUE)
@@ -70,8 +70,24 @@ test_that("Optimal bandwidth calculations", {
                    kern="uniform", opt.criterion="FLCI", point.inference=TRUE)
     expect_equal(rr$coefficients$conf.high.onesided,
                  55.24963853)
+    expect_equal(rr$coefficients$eff.obs, 858)
+    expect_equal(rr$coefficients$eff.obs,
+                 sum(lee08$margin<=rr$coefficients$bandwidth & lee08$margin>0))
     dp <- NPRData(lee08[lee08$margin>0, ], cutoff=0, "IP")
     Mh <- rr$coefficients$M
+
+    re <- RDHonest(voteshare ~ margin, data=lee08, subset=(margin>0),
+                   kern="uniform", opt.criterion="FLCI", point.inference=TRUE,
+                   se.method="EHW")
+    ## Should match regression
+    rl <- lm(voteshare ~ margin, data=lee08,
+             subset=margin>0 & margin<=rr$coefficients$bandwidth)
+    XX <- model.matrix(rl)
+    meat <- crossprod(XX, rl$residuals^2*XX)
+    vl <- (solve(crossprod(XX)) %*% meat %*% solve(crossprod(XX)))[1, 1]
+    expect_equal(sqrt(vl), re$coefficients$std.error)
+    expect_equal(unname(rl$coefficients[1]), rr$coefficients$estimate)
+
     r1 <- RDHonest(voteshare~margin, data=lee08, subset=margin>0, M=2*Mh,
                    opt.criterion="MSE", point.inference=TRUE)
     r <- capture.output(print(r1, digits=4))
diff --git a/tests/testthat/test_rd.R b/tests/testthat/test_rd.R
index 7d2c752..9083d25 100644
--- a/tests/testthat/test_rd.R
+++ b/tests/testthat/test_rd.R
@@ -70,6 +70,17 @@ test_that("Honest inference in Lee and LM data",  {
                   kern="uniform", h=18, M=0.1, se.method="EHW")
     r2 <- RDHonest(mortHS ~ povrate60, data=headst,
                   kern="uniform", h=18, M=0.1, se.method="nn")
+    ## Should match regression
+    rl <- lm(mortHS ~ povrate60*I(povrate60>=0), data=headst,
+             subset=abs(povrate60)<=18)
+    XX <- model.matrix(rl)
+    meat <- crossprod(XX, rl$residuals^2*XX)
+    vl <- (solve(crossprod(XX)) %*% meat %*% solve(crossprod(XX)))[3, 3]
+    expect_equal(sqrt(vl), r1$coefficients$std.error)
+    expect_equal(unname(rl$coefficients[3]), r1$coefficients$estimate)
+
+    expect_equal(r1$coefficients$eff.obs, 954)
+    expect_equal(954, sum(abs(r1$d$X)<=18))
     r1o <- capture.output(print(r1, digits=4))
     r2o <- capture.output(print(r2, digits=4))
     expect_equal(r1$coefficients$estimate, -1.1982581306)
diff --git a/vignettes/RDHonest.Rmd b/vignettes/RDHonest.Rmd
index 9a63e96..ccbb617 100644
--- a/vignettes/RDHonest.Rmd
+++ b/vignettes/RDHonest.Rmd
@@ -536,7 +536,7 @@ RDHonest(voteshare ~ margin, data=lee08, subset=margin>0,
 
 In fuzzy RD, by Theorem B.2 in the appendix to @ArKo16honest, the estimator is asymptotically equivalent to
 \begin{equation*}
-\sum_i w_i (Y_i-D_i\beta)/fs,
+\sum_i w_i (Y_i-D_i\beta)/first stage,
 \end{equation*}
 so the Lindeberg weights are the same as in the sharp case.