Fix offset #14

seananderson · Oct 15, 2021 · 37e8c02 · 37e8c02
1 parent 34afff4
commit 37e8c02
Show file tree

Hide file tree

Showing 6 changed files with 27 additions and 15 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -62,6 +62,6 @@ VignetteBuilder:
     knitr
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 SystemRequirements: GNU make
 Biarch: true
diff --git a/R/fitting.R b/R/fitting.R
@@ -5,10 +5,7 @@
 #' through time. It is also possible to fit a spatial random fields model
 #' without a time component.
 #'
-#' @param formula The model formula. This is generally similar to formulas used
-#' in other packages, such as [lm()] or [glm()] with the exception of how the offset
-#' is handled. To include the offset, a named column of the dataframe must be
-#' `offset`, and the formula needs to include `+ offset`.
+#' @param formula The model formula.
 #' @param data A data frame.
 #' @param time A character object giving the name of the time column. Leave
 #'   as `NULL` to fit a spatial GLMM without a time element.
@@ -94,6 +91,7 @@
 #' @param cluster The type of clustering algorithm used to determine the knot
 #'   locations. `"pam"` = [cluster::pam()]. The `"kmeans"`
 #'   algorithm will be faster on larger datasets.
+#' @param offset An optional offset vector.
 #' @param ... Any other arguments to pass to [rstan::sampling()].
 #'
 #' @details
@@ -174,6 +172,7 @@ glmmfields <- function(formula, data, lon, lat,
                        save_log_lik = FALSE,
                        df_lower_bound = 2,
                        cluster = c("pam", "kmeans"),
+                       offset = NULL,
                        ...) {
 
   # argument checks:
@@ -222,7 +221,6 @@ glmmfields <- function(formula, data, lon, lat,
   y <- model.response(mf, "numeric")
   fixed_intercept <- ncol(X) == 0
 
-  offset <- as.vector(model.offset(mf))
   if (is.null(offset)) offset <- rep(0, length(y))
 
   if (is.null(time)) {
@@ -331,6 +329,7 @@ glmmfields <- function(formula, data, lon, lat,
     lon = lon, lat = lat,
     time = time, year_re = year_re,
     station = data_list$stationID, obs_model = obs_model,
+    offset = offset,
     fixed_intercept = fixed_intercept, family = family
   )
   out <- structure(out, class = "glmmfields")

diff --git a/R/predict.R b/R/predict.R
@@ -15,6 +15,7 @@
 #'   "response" scale (Same as for [stats::predict.glm()]).
 #' @param return_mcmc Logical. Should the full MCMC draws be returned for the
 #'   predictions?
+#' @param offset Optional offset vector to be used in prediction.
 #' @param iter Number of MCMC iterations to draw. Defaults to all.
 #' @param ... Ignored currently
 #'
@@ -127,6 +128,7 @@ predict.glmmfields <- function(object, newdata = NULL,
                                interval = c("confidence", "prediction"),
                                type = c("link", "response"),
                                return_mcmc = FALSE,
+                               offset = NULL,
                                iter = "all", ...) {
   estimate_method <- match.arg(estimate_method)
   interval <- match.arg(interval)
@@ -143,6 +145,11 @@ predict.glmmfields <- function(object, newdata = NULL,
 
   obs_model <- object$obs_model
 
+  if (is.null(newdata))  offset <- object$offset
+  if (!is.null(newdata) && !is.null(object$offset)) {
+    if (is.null(offset)) stop("Missing `offset` argument.", call. = FALSE)
+  }
+
   # newdata is df with time, y, lon, lat
   # if null, defaults to data used to fit model
   if (is.null(newdata)) newdata <- object$data
@@ -265,6 +272,8 @@ predict.glmmfields <- function(object, newdata = NULL,
   # generate (1) confidence intervals on mean or (2) prediction intervals
   # including obs error
 
+  pred_values <- pred_values + offset
+
   if (type == "response") {
     # gamma or NB2 or poisson:
     if (obs_model %in% c(0, 2, 5, 6)) pred_values <- exp(pred_values)

diff --git a/man/glmmfields.Rd b/man/glmmfields.Rd
diff --git a/man/predict.Rd b/man/predict.Rd
diff --git a/tests/testthat/test-fit-basic.R b/tests/testthat/test-fit-basic.R
@@ -261,7 +261,7 @@ test_that("A offset in formula works", {
     covariance = "squared-exponential"
   )
   # print(s$plot)
-  s$dat$offset = 0
+  s$dat$offset <- rnorm(nrow(s$dat), mean = 0, sd = 0.1)
 
   m <- glmmfields(y ~ 1,
                   data = s$dat, time = "time",
@@ -270,7 +270,7 @@ test_that("A offset in formula works", {
                   estimate_df = FALSE, fixed_df_value = df,
                   covariance = "squared-exponential"
   )
-  m_offset <- glmmfields(y ~ 1 + offset,
+  m_offset <- glmmfields(y ~ 1, offset = s$dat$offset,
                   data = s$dat, time = "time",
                   lat = "lat", lon = "lon", nknots = nknots,
                   iter = ITER, chains = CHAINS, seed = SEED,
@@ -280,7 +280,8 @@ test_that("A offset in formula works", {
   b <- tidy(m, estimate.method = "median")
   b_offset <- tidy(m_offset, estimate.method = "median")
 
-  expect_equal(as.numeric(b[b$term == "sigma[1]", "estimate", drop = TRUE]), as.numeric(b_offset[b_offset$term == "sigma[1]", "estimate", drop = TRUE]), tol = sigma * TOL)
-  expect_equal(as.numeric(b[b$term == "gp_sigma", "estimate", drop = TRUE]), as.numeric(b_offset[b_offset$term == "gp_sigma", "estimate", drop = TRUE]), tol = gp_sigma * TOL * 1.5)
-  expect_equal(as.numeric(b[b$term == "gp_theta", "estimate", drop = TRUE]), as.numeric(b_offset[b_offset$term == "gp_theta", "estimate", drop = TRUE]), tol = gp_theta * TOL)
+  p1 <- predict(m_offset)
+  p2 <- predict(m_offset, newdata = s$dat, offset = s$dat$offset)
+  expect_identical(p1, p2)
+  expect_error(p3 <- predict(m_offset, newdata = s$dat), regexp = "offset")
 })