Skip to content

Commit

Permalink
Merge pull request #27 from fate-ewi/dev
Browse files Browse the repository at this point in the history
remove weights, rename to inv_var_weights -- and add a second type of…
  • Loading branch information
ericward-noaa authored Dec 7, 2023
2 parents f13016d + ccfe29d commit e0bbe7f
Show file tree
Hide file tree
Showing 7 changed files with 998 additions and 965 deletions.
47 changes: 34 additions & 13 deletions R/fit_dfa.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,16 @@
#' of diagnostic functions -- but making the models a lot larger for storage. Finally, this argument may be a custom string
#' of parameters to monitor, e.g. c("x","sigma")
#' @param verbose Whether to print iterations and information from Stan, defaults to FALSE.
#' @param weights Optional name of "weights" argument in data frame. This is only implemented when data
#' are in long format. If not entered, defaults to weights = 1 for all observations. The implementation of weights
#' varies slightly by family: Gaussian family models use -log(w_i) in the dispersion formula
#' @param inv_var_weights Optional name of inverse variance weights argument in data frame. This is only implemented when data
#' are in long format. If not entered, defaults to inv_var_weights = 1 for all observations. The implementation of inv_var_weights
#' relies on inverse variance weightings, so that if you have standard errors associated with each observation,
#' the inverse variance weights are calculated as inv_var_weights <- 1 / (standard_errors^2) . The observation error sigma
#' in the likelihood then becomes sigma / sqrt(inv_var_weights)
#' @param likelihood_weights Optional name of likelihood weights argument in data frame. These
#' are used in the same way weights are implemented in packages `glmmTMB`, `brms`, `sdmTMB`, etc.
#' Weights are used as multipliers on the log-likelihood, with higher weights allowing observations
#' to contribute more. Currently only implemented with univariate distributions, when data is in long
#' format
#' @param ... Any other arguments to pass to [rstan::sampling()].
#' @details Note that there is nothing restricting the loadings and trends from
#' being inverted (i.e. multiplied by `-1`) for a given chain. Therefore, if
Expand Down Expand Up @@ -187,7 +194,8 @@ fit_dfa <- function(y = y,
par_list = NULL,
family = "gaussian",
verbose = FALSE,
weights = NULL,
inv_var_weights = NULL,
likelihood_weights = NULL,
gp_theta_prior = c(3, 1),
expansion_prior = FALSE,
...) {
Expand Down Expand Up @@ -234,9 +242,14 @@ fit_dfa <- function(y = y,
y$ts <- as.numeric(as.factor(y[["ts"]]))
N <- max(y[["time"]])
P <- max(y[["ts"]])
if (!is.null(weights)) {
if(weights %in% names(y) == FALSE) {
stop("Error: weight name is not found in long data frame")
if (!is.null(inv_var_weights)) {
if(inv_var_weights %in% names(y) == FALSE) {
stop("Error: inverse variance weight name is not found in long data frame")
}
}
if (!is.null(likelihood_weights)) {
if(likelihood_weights %in% names(y) == FALSE) {
stop("Error: likelihood weight name is not found in long data frame")
}
}
}
Expand Down Expand Up @@ -333,7 +346,8 @@ fit_dfa <- function(y = y,
}
nVariances <- length(unique(varIndx))

weights_vec <- NULL
inv_var_weights_vec <- NULL
likelihood_weights_vec <- NULL
# indices of positive values - Stan can't handle NAs
if (data_shape[1] == "wide") {
row_indx_pos <- matrix(rep(seq_len(P), N), P, N)[!is.na(y)]
Expand All @@ -343,7 +357,8 @@ fit_dfa <- function(y = y,
col_indx_na <- matrix(sort(rep(seq_len(N), P)), P, N)[is.na(y)]
n_na <- length(row_indx_na)
y <- y[!is.na(y)]
weights_vec <- rep(1, length(y))
inv_var_weights_vec <- rep(1, length(y))
likelihood_weights_vec <- rep(1, length(y))
if(!is.null(offset)) {
stop("Error: if offset is included, data shape must be long")
}
Expand All @@ -358,10 +373,15 @@ fit_dfa <- function(y = y,
col_indx_na <- matrix(1, 1, 1)[is.na(runif(1))]
n_na <- length(row_indx_na)

if(!is.null(weights)) {
weights_vec <- y[[weights]]
if(!is.null(inv_var_weights)) {
inv_var_weights_vec <- y[[inv_var_weights]]
} else {
inv_var_weights_vec <- rep(1, nrow(y))
}
if(!is.null(likelihood_weights)) {
likelihood_weights_vec <- y[[likelihood_weights]]
} else {
weights_vec <- rep(1, nrow(y))
likelihood_weights_vec <- rep(1, nrow(y))
}

offset_vec <- rep(0, nrow(y))
Expand Down Expand Up @@ -543,7 +563,8 @@ fit_dfa <- function(y = y,
gp_theta_prior = gp_theta_prior,
use_expansion_prior = as.integer(expansion_prior),
input_offset = offset_vec,
weights_vec = weights_vec
inv_var_weights_vec = sqrt(1.0/inv_var_weights_vec),
weights_vec = likelihood_weights_vec
)

if (is.null(par_list)) {
Expand Down
23 changes: 10 additions & 13 deletions inst/stan/dfa.stan
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,19 @@ data {
int<lower=0, upper=1> est_nb2_params;
int<lower=0, upper=1> use_expansion_prior;
array[2] real gp_theta_prior;
array[n_pos] real inv_var_weights_vec;
array[n_pos] real weights_vec;
}
transformed data {
int n_pcor; // dimension for cov matrix
int n_loglik; // dimension for loglik calculation
vector[K] zeros;
array[N] real data_locs; // for gp model
array[n_pos] real log_weights_vec; // weights
vector[K*proportional_model] alpha_vec;
vector[n_knots] muZeros;
real gp_delta = 1e-9; // stabilizing value for GP model
real lower_bound_z;

for(i in 1:n_pos) {
log_weights_vec[i] = log(weights_vec[i]);
}
for(i in 1:N) {
data_locs[i] = i;
}
Expand Down Expand Up @@ -579,19 +576,19 @@ model {
if(long_format==0) {
if(obs_model == 1) {for(i in 1:P) target += normal_lpdf(yall[i] | pred[i], sigma_vec[i]);} // gaussian
} else {
if(obs_model == 1) {for(i in 1:n_pos) target += normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], exp(log(sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));}
if(obs_model == 2) {for(i in 1:n_pos) target += gamma_lpdf(y[i] | gamma_a_vec[row_indx_pos[i]], gamma_a_vec[row_indx_pos[i]] / exp(input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]));} // gamma
if(obs_model == 3) {for(i in 1:n_pos) target += poisson_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // poisson
if(obs_model == 4) {for(i in 1:n_pos) target += neg_binomial_2_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], nb_phi_vec[row_indx_pos[i]]);} // negbin
if(obs_model == 5) {for(i in 1:n_pos) target += bernoulli_logit_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // binomial
if(obs_model == 6) {for(i in 1:n_pos) target += lognormal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]]);} // lognormal
if(obs_model == 1) {for(i in 1:n_pos) target += weights_vec[i] * normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);}
if(obs_model == 2) {for(i in 1:n_pos) target += weights_vec[i] * gamma_lpdf(y[i] | gamma_a_vec[row_indx_pos[i]], gamma_a_vec[row_indx_pos[i]] / exp(input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]));} // gamma
if(obs_model == 3) {for(i in 1:n_pos) target += weights_vec[i] * poisson_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // poisson
if(obs_model == 4) {for(i in 1:n_pos) target += weights_vec[i] * neg_binomial_2_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], nb_phi_vec[row_indx_pos[i]]);} // negbin
if(obs_model == 5) {for(i in 1:n_pos) target += weights_vec[i] * bernoulli_logit_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // binomial
if(obs_model == 6) {for(i in 1:n_pos) target += weights_vec[i] * lognormal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]]);} // lognormal
}
} else {
// need to loop over time slices / columns - each ~ MVN
if(long_format==0) {
if(obs_model == 1) {for(i in 1:N) target += multi_normal_cholesky_lpdf(col(yall,i) | col(pred,i), diag_pre_multiply(sigma_vec, Lcorr));}
} else {
if(obs_model == 1) {for(i in 1:n_pos) target += normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], exp(log(cond_sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));}
if(obs_model == 1) {for(i in 1:n_pos) target += weights_vec[i] * normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], cond_sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);}
}
}
}
Expand Down Expand Up @@ -622,7 +619,7 @@ generated quantities {
}
}
} else {
if(obs_model == 1) {for(i in 1:n_pos) log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], exp(log(sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));}
if(obs_model == 1) {for(i in 1:n_pos) log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);}
if(obs_model == 2) {for(i in 1:n_pos) log_lik[i] = gamma_lpdf(y[i] | gamma_a_vec[row_indx_pos[i]], gamma_a_vec[row_indx_pos[i]] / exp(input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]));} // gamma
if(obs_model == 3) {for(i in 1:n_pos) log_lik[i] = poisson_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // poisson
if(obs_model == 4) {for(i in 1:n_pos) log_lik[i] = neg_binomial_2_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], nb_phi_vec[row_indx_pos[i]]);} // negbin
Expand All @@ -639,7 +636,7 @@ generated quantities {
} else {
for(i in 1:n_pos) {
// //row_indx_pos[i] is the time series, col_index_pos is the time
log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], exp(log(cond_sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));
log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], cond_sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);
}
}
}
Expand Down
17 changes: 13 additions & 4 deletions man/fit_dfa.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e0bbe7f

Please sign in to comment.