Skip to content

Commit

Permalink
NN uses covariate-adjusted outcome
Browse files Browse the repository at this point in the history
  • Loading branch information
kolesarm committed Dec 12, 2024
1 parent 38a6cba commit 06acbf2
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 31 deletions.
15 changes: 10 additions & 5 deletions R/NPRfunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
be <- as.matrix(r0$coefficients)
if (any(is.na(be[1:Lz, ]))) {
return(list(estimate=0, se=NA, est_w=W*0, sigma2=NA*d$Y, eff.obs=0,
fs=NA, lm=r0, Z=Z))
fs=NA, lm=r0, Yadj=d$Y))
}
## If the collinearity comes from covariates, drop them
if (any(is.na(be[-(1:Lz), ]))) {
Expand All @@ -38,6 +38,11 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
collapse=", "))
r0 <- stats::lm.wfit(x=Z, y=d$Y, w=W)
}
Yadj <- d$Y
if (NCOL(Z)> Lz) {
Yadj <- d$Y - Z[, -(1:Lz), drop=FALSE] %*%
as.matrix(r0$coefficients)[-(1:Lz), , drop=FALSE]
}

class(r0) <- c(if (ncol(d$Y)>1) "mlm", "lm")
wgt <- W*0
Expand All @@ -54,14 +59,14 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
HC <- function(r) r[, rep(seq_len(ny), each=ny)] * r[, rep(seq_len(ny), ny)]

## For RD, compute variance separately on either side of cutoff
## TODO: better implement
## using covariate-adjusted outcome
NN <- function(X) {
res <- matrix(0, nrow=length(X), ncol=ny^2)
res[ok] <-
if (!inherits(d, "IP"))
rbind(as.matrix(sigmaNN(X[d$m & ok], d$Y[d$m & ok, ], J,
rbind(as.matrix(sigmaNN(X[d$m & ok], Yadj[d$m & ok, ], J,
d$w[d$m & ok])),
as.matrix(sigmaNN(X[d$p & ok], d$Y[d$p & ok, ], J,
as.matrix(sigmaNN(X[d$p & ok], Yadj[d$p & ok, ], J,
d$w[d$p & ok])))
else
sigmaNN(X[ok], d$Y[ok, ], J, d$w[ok])
Expand All @@ -83,7 +88,7 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
V <- as.vector(crossprod(us))
}
ret <- list(estimate=r0$coefficients[1], se=sqrt(V[1]), est_w=wgt,
sigma2=hsigma2, eff.obs=eff.obs, fs=NA, lm=r0, Z=Z)
sigma2=hsigma2, eff.obs=eff.obs, fs=NA, lm=r0, Yadj=Yadj)

if (inherits(d, "FRD")) {
ret$fs <- r0$coefficients[1, 2]
Expand Down
17 changes: 5 additions & 12 deletions R/RDHonest.R
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ RDHonest <- function(formula, data, subset, weights, cutoff=0, M,

if (missing(M)) {
M <- MROT(d)
message("Using Armstong & Kolesar (2020) ROT for smoothness constant M")
message(paste0("Using Armstrong & Kolesar (2020) ROT ",
"for smoothness constant M"))
}

if (kernel_type(kern)=="optimal") {
Expand All @@ -234,17 +235,9 @@ RDHonest <- function(formula, data, subset, weights, cutoff=0, M,
}

covariate_adjust <- function(d, kern, h) {
if (is.null(d$Y_unadj)) d$Y_unadj <- d$Y
d0 <- d
d0$Y <- d0$Y_unadj
r <- NPReg(d0, h, kern, order=1, se.method="EHW")
be <- as.matrix(r$lm$coefficients)
if (r$eff.obs==0)
stop("Too few effective observations to perform covariate adjustment")
## Original
## L <- NCOL(d$covs)
## d$Y <-d$Y_unadj-d$covs %*% be[seq_len(L)+NROW(be)-L, , drop=FALSE]
d$Y <-d$Y_unadj-r$Z[, -(1:4)] %*% be[-(1:4), , drop=FALSE]
r <- NPReg(d, h, kern, order=1, se.method="EHW")
d$Y_unadj <- d$Y
d$Y <- r$Yadj
d
}

Expand Down
2 changes: 1 addition & 1 deletion doc/RDHonest.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ RDHonest(log(cn) | retired ~ elig_year | education, data=rcp,
f3 <- RDScatter(log(cn)~elig_year, data=rcp, cutoff=0, avg=Inf,
xlab="Years to eligibility",
ylab="Log consumption of non-durables", propdotsize=TRUE,
subset=abs(elig_year)<10)
subset=abs(elig_year)<15)
## Adjust size of dots if they are too big
f3 + ggplot2::scale_size_area(max_size = 5)

Expand Down
8 changes: 4 additions & 4 deletions doc/RDHonest.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -726,16 +726,16 @@ RDHonest(log(cn) | retired ~ elig_year | education, data=rcp,
Relative to the previous estimate without covariates, the point estimate is now
much larger. This is in part due to slightly smaller bandwidth used, and the
regression function for the reduced form appears noisy below the cutoff,
potentially due to measurement error: see Figure 3. The noise is also
responsible for the rather large data-driven estimates of the curvature
parameters.
potentially due to measurement error: see Figure 3. As a result, the estimates
are quite sensitive to the bandwidth used. The noise is also responsible for the
rather large data-driven estimates of the curvature parameters.

```{r, fig.width=4.5, fig.height=3.5, fig.cap="Battistin et al (2009) data"}
## see Figure 3
f3 <- RDScatter(log(cn)~elig_year, data=rcp, cutoff=0, avg=Inf,
xlab="Years to eligibility",
ylab="Log consumption of non-durables", propdotsize=TRUE,
subset=abs(elig_year)<10)
subset=abs(elig_year)<15)
## Adjust size of dots if they are too big
f3 + ggplot2::scale_size_area(max_size = 5)
```
Expand Down
Binary file modified doc/RDHonest.pdf
Binary file not shown.
Binary file modified doc/manual.pdf
Binary file not shown.
19 changes: 11 additions & 8 deletions tests/testthat/test_covariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,24 @@ test_that("Test collinear covariates", {
df <- headst[complete.cases(headst), ]
covlist <- "urban+black"
fh1 <- as.formula(paste0("mortHS ~ povrate|", covlist))
r0 <- RDHonest(fh1, data=df)
expect_message(r0 <- RDHonest(fh1, data=df), "Using Armstrong")
Z <- cbind(df$urban, df$black, df$urban)
expect_message(r1 <- RDHonest(df$mortHS~df$povrate|Z, M=r0$coefficients$M))
expect_message(expect_message(r1 <- RDHonest(df$mortHS~df$povrate|Z,
M=r0$coefficients$M)))
expect_lt(max(abs(r1$coefficients[2:13]-r0$coefficients[2:13])), 1e-15)
Z <- cbind(df$urban, df$urban, df$black, df$black, df$urban+df$black)
expect_message(r2 <- RDHonest(df$mortHS~df$povrate|Z))
suppressMessages(expect_message(r2 <- RDHonest(df$mortHS~df$povrate|Z)))
expect_lt(max(abs(r2$coefficients[2:13]-r0$coefficients[2:13])), 1e-15)
expect_error(RDHonest(df$mortHS~df$povrate|Z, h=0.05, kern="uniform"))
expect_message(expect_message(r <- RDHonest(df$mortHS~df$povrate|Z, h=0.05,
kern="uniform")), "large")

## Fuzzy
df <- rcp[1:1000, ]
Z <- cbind(df$food, df$food+df$family_size, df$family_size)
r0 <- RDHonest(log(c)|retired ~ elig_year|Z[, 1:2], data=df,
weights=survey_year)
expect_message(r1 <- RDHonest(log(c)|retired ~ elig_year|Z, data=df,
weights=survey_year))
expect_message(r0 <- RDHonest(log(c)|retired ~ elig_year|Z[, 1:2], data=df,
weights=survey_year), "Using Armstrong")
suppressMessages(expect_message(r1 <- RDHonest(log(c)|retired ~ elig_year|Z,
data=df,
weights=survey_year)))
expect_lt(max(abs(r1$coefficients[2:13]-r0$coefficients[2:13])), 1e-15)
})
2 changes: 1 addition & 1 deletion vignettes/RDHonest.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ rc
We see that the inclusion of covariates leads to a reduction in the
rule-of-thumb curvature and also smaller standard errors (this would be true
even if the bandwidth was kept fixed). Correspondingly, the CIs are tighter by
about 6 percentage points:
about 9 percentage points:
```{r}
ci_len <- c(rc$coefficients$conf.high-rc$coefficients$conf.low,
rn$coefficients$conf.high-rn$coefficients$conf.low)
Expand Down

0 comments on commit 06acbf2

Please sign in to comment.