diff --git a/R/NPRfunctions.R b/R/NPRfunctions.R
index 8576a5f..97d2534 100644
--- a/R/NPRfunctions.R
+++ b/R/NPRfunctions.R
@@ -27,7 +27,7 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
     be <- as.matrix(r0$coefficients)
     if (any(is.na(be[1:Lz, ]))) {
         return(list(estimate=0, se=NA, est_w=W*0, sigma2=NA*d$Y, eff.obs=0,
-                    fs=NA, lm=r0, Z=Z))
+                    fs=NA, lm=r0, Yadj=d$Y))
     }
     ## If the collinearity comes from covariates, drop them
     if (any(is.na(be[-(1:Lz), ]))) {
@@ -38,6 +38,11 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
                       collapse=", "))
         r0 <- stats::lm.wfit(x=Z, y=d$Y, w=W)
     }
+    Yadj <- d$Y
+    if (NCOL(Z)> Lz) {
+        Yadj <- d$Y - Z[, -(1:Lz), drop=FALSE] %*%
+            as.matrix(r0$coefficients)[-(1:Lz), , drop=FALSE]
+    }
 
     class(r0) <- c(if (ncol(d$Y)>1) "mlm", "lm")
     wgt <- W*0
@@ -54,14 +59,14 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
     HC <- function(r) r[, rep(seq_len(ny), each=ny)] * r[, rep(seq_len(ny), ny)]
 
     ## For RD, compute variance separately on either side of cutoff
-    ## TODO: better implement
+    ## using covariate-adjusted outcome
     NN <- function(X) {
         res <- matrix(0, nrow=length(X), ncol=ny^2)
         res[ok] <-
             if (!inherits(d, "IP"))
-                rbind(as.matrix(sigmaNN(X[d$m & ok], d$Y[d$m & ok, ], J,
+                rbind(as.matrix(sigmaNN(X[d$m & ok], Yadj[d$m & ok, ], J,
                                         d$w[d$m & ok])),
-                      as.matrix(sigmaNN(X[d$p & ok], d$Y[d$p & ok, ], J,
+                      as.matrix(sigmaNN(X[d$p & ok], Yadj[d$p & ok, ], J,
                                         d$w[d$p & ok])))
             else
                 sigmaNN(X[ok], d$Y[ok, ], J, d$w[ok])
@@ -83,7 +88,7 @@ NPReg <- function(d, h, kern="triangular", order=1, se.method="nn", J=3) {
         V <- as.vector(crossprod(us))
     }
     ret <- list(estimate=r0$coefficients[1], se=sqrt(V[1]), est_w=wgt,
-                sigma2=hsigma2, eff.obs=eff.obs, fs=NA, lm=r0, Z=Z)
+                sigma2=hsigma2, eff.obs=eff.obs, fs=NA, lm=r0, Yadj=Yadj)
 
     if (inherits(d, "FRD")) {
         ret$fs <- r0$coefficients[1, 2]
diff --git a/R/RDHonest.R b/R/RDHonest.R
index ed3df90..0917d55 100644
--- a/R/RDHonest.R
+++ b/R/RDHonest.R
@@ -210,7 +210,8 @@ RDHonest <- function(formula, data, subset, weights, cutoff=0, M,
 
     if (missing(M)) {
         M <- MROT(d)
-        message("Using Armstong & Kolesar (2020) ROT for smoothness constant M")
+        message(paste0("Using Armstrong & Kolesar (2020) ROT ",
+                       "for smoothness constant M"))
     }
 
     if (kernel_type(kern)=="optimal") {
@@ -234,17 +235,9 @@ RDHonest <- function(formula, data, subset, weights, cutoff=0, M,
 }
 
 covariate_adjust <- function(d, kern, h) {
-    if (is.null(d$Y_unadj)) d$Y_unadj <- d$Y
-    d0 <- d
-    d0$Y <- d0$Y_unadj
-    r <- NPReg(d0, h, kern, order=1, se.method="EHW")
-    be <- as.matrix(r$lm$coefficients)
-    if (r$eff.obs==0)
-        stop("Too few effective observations to perform covariate adjustment")
-    ## Original
-    ## L <- NCOL(d$covs)
-    ## d$Y <-d$Y_unadj-d$covs %*% be[seq_len(L)+NROW(be)-L, , drop=FALSE]
-    d$Y <-d$Y_unadj-r$Z[, -(1:4)] %*% be[-(1:4), , drop=FALSE]
+    r <- NPReg(d, h, kern, order=1, se.method="EHW")
+    d$Y_unadj <- d$Y
+    d$Y <- r$Yadj
     d
 }
 
diff --git a/doc/RDHonest.R b/doc/RDHonest.R
index 9a1a9f2..2ed77e4 100644
--- a/doc/RDHonest.R
+++ b/doc/RDHonest.R
@@ -90,7 +90,7 @@ RDHonest(log(cn) | retired ~ elig_year | education, data=rcp,
 f3 <- RDScatter(log(cn)~elig_year, data=rcp, cutoff=0, avg=Inf,
                 xlab="Years to eligibility",
                 ylab="Log consumption of non-durables", propdotsize=TRUE,
-                subset=abs(elig_year)<10)
+                subset=abs(elig_year)<15)
 ## Adjust size of dots if they are too big
 f3 + ggplot2::scale_size_area(max_size = 5)
 
diff --git a/doc/RDHonest.Rmd b/doc/RDHonest.Rmd
index 60a0596..11bcbed 100644
--- a/doc/RDHonest.Rmd
+++ b/doc/RDHonest.Rmd
@@ -726,16 +726,16 @@ RDHonest(log(cn) | retired ~ elig_year | education, data=rcp,
 Relative to the previous estimate without covariates, the point estimate is now
 much larger. This is in part due to slightly smaller bandwidth used, and the
 regression function for the reduced form appears noisy below the cutoff,
-potentially due to measurement error: see Figure 3. The noise is also
-responsible for the rather large data-driven estimates of the curvature
-parameters.
+potentially due to measurement error: see Figure 3. As a result, the estimates
+are quite sensitive to the bandwidth used. The noise is also responsible for the
+rather large data-driven estimates of the curvature parameters.
 
 ```{r, fig.width=4.5, fig.height=3.5, fig.cap="Battistin et al (2009) data"}
 ## see Figure 3
 f3 <- RDScatter(log(cn)~elig_year, data=rcp, cutoff=0, avg=Inf,
                 xlab="Years to eligibility",
                 ylab="Log consumption of non-durables", propdotsize=TRUE,
-                subset=abs(elig_year)<10)
+                subset=abs(elig_year)<15)
 ## Adjust size of dots if they are too big
 f3 + ggplot2::scale_size_area(max_size = 5)
 ```
diff --git a/doc/RDHonest.pdf b/doc/RDHonest.pdf
index a62fb06..b9299aa 100644
Binary files a/doc/RDHonest.pdf and b/doc/RDHonest.pdf differ
diff --git a/doc/manual.pdf b/doc/manual.pdf
index 31cc9a0..68f7fcf 100644
Binary files a/doc/manual.pdf and b/doc/manual.pdf differ
diff --git a/tests/testthat/test_covariates.R b/tests/testthat/test_covariates.R
index 6d8c112..a9b0ad3 100644
--- a/tests/testthat/test_covariates.R
+++ b/tests/testthat/test_covariates.R
@@ -60,21 +60,24 @@ test_that("Test collinear covariates", {
     df <- headst[complete.cases(headst), ]
     covlist <- "urban+black"
     fh1 <- as.formula(paste0("mortHS ~ povrate|", covlist))
-    r0 <- RDHonest(fh1, data=df)
+    expect_message(r0 <- RDHonest(fh1, data=df), "Using Armstrong")
     Z <- cbind(df$urban, df$black, df$urban)
-    expect_message(r1 <- RDHonest(df$mortHS~df$povrate|Z, M=r0$coefficients$M))
+    expect_message(expect_message(r1 <- RDHonest(df$mortHS~df$povrate|Z,
+                                                 M=r0$coefficients$M)))
     expect_lt(max(abs(r1$coefficients[2:13]-r0$coefficients[2:13])), 1e-15)
     Z <- cbind(df$urban, df$urban, df$black, df$black, df$urban+df$black)
-    expect_message(r2 <- RDHonest(df$mortHS~df$povrate|Z))
+    suppressMessages(expect_message(r2 <- RDHonest(df$mortHS~df$povrate|Z)))
     expect_lt(max(abs(r2$coefficients[2:13]-r0$coefficients[2:13])), 1e-15)
-    expect_error(RDHonest(df$mortHS~df$povrate|Z, h=0.05, kern="uniform"))
+    expect_message(expect_message(r <- RDHonest(df$mortHS~df$povrate|Z, h=0.05,
+                                                kern="uniform")), "large")
 
     ## Fuzzy
     df <- rcp[1:1000, ]
     Z <- cbind(df$food, df$food+df$family_size, df$family_size)
-    r0 <- RDHonest(log(c)|retired ~ elig_year|Z[, 1:2], data=df,
-                   weights=survey_year)
-    expect_message(r1 <- RDHonest(log(c)|retired ~ elig_year|Z, data=df,
-                                  weights=survey_year))
+    expect_message(r0 <- RDHonest(log(c)|retired ~ elig_year|Z[, 1:2], data=df,
+                                  weights=survey_year), "Using Armstrong")
+    suppressMessages(expect_message(r1 <- RDHonest(log(c)|retired ~ elig_year|Z,
+                                                   data=df,
+                                                   weights=survey_year)))
     expect_lt(max(abs(r1$coefficients[2:13]-r0$coefficients[2:13])), 1e-15)
 })
diff --git a/vignettes/RDHonest.Rmd b/vignettes/RDHonest.Rmd
index 11bcbed..80b6796 100644
--- a/vignettes/RDHonest.Rmd
+++ b/vignettes/RDHonest.Rmd
@@ -700,7 +700,7 @@ rc
 We see that the inclusion of covariates leads to a reduction in the
 rule-of-thumb curvature and also smaller standard errors (this would be true
 even if the bandwidth was kept fixed). Correspondingly, the CIs are tighter by
-about 6 percentage points:
+about 9 percentage points:
 ```{r}
 ci_len <- c(rc$coefficients$conf.high-rc$coefficients$conf.low,
             rn$coefficients$conf.high-rn$coefficients$conf.low)