Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: cjcarlson/embarcadero
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v1.1.0.0001
Choose a base ref
...
head repository: cjcarlson/embarcadero
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Loading
Showing with 594 additions and 4,196 deletions.
  1. BIN .DS_Store
  2. +3 −3 DESCRIPTION
  3. +2 −1 NAMESPACE
  4. +35 −1 R/bart.step.R
  5. +5 −2 R/bartflex.R
  6. +0 −33 R/bigstack.R
  7. +1 −0 R/embarcadero-package.R
  8. +161 −0 R/multipartial.R
  9. +10 −2 R/partials.R
  10. +0 −53 R/plot_mcmc.R
  11. +1 −1 R/plot_ri.R
  12. +8 −7 R/predict.R
  13. +53 −0 R/retune.R
  14. +29 −17 R/spartials.R
  15. +44 −12 R/variable.step.R
  16. +30 −4 R/varimp.R
  17. +27 −1 R/varimp.diag.R
  18. +5 −5 R/zzz.R
  19. +33 −11 README.md
  20. +0 −207 inst/doc/virtualbart.R
  21. +0 −337 inst/doc/virtualbart.Rmd
  22. +0 −911 inst/doc/virtualbart.html
  23. +14 −4 man/bart.flex.Rd
  24. +10 −2 man/bart.step.Rd
  25. +0 −16 man/bigstack.Rd
  26. +66 −0 man/multipartial.Rd
  27. +11 −2 man/partial.Rd
  28. +0 −52 man/plot.mcmc.Rd
  29. +10 −2 man/predict2.bart.Rd
  30. +16 −0 man/retune.Rd
  31. +10 −3 man/spartial.Rd
  32. +8 −2 man/variable.step.Rd
  33. +2 −0 man/varimp.Rd
  34. +0 −337 vignettes/virtualbart.Rmd
  35. +0 −2,168 vignettes/virtualbart.html
Binary file added .DS_Store
Binary file not shown.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: embarcadero
Title: Species distribution models with BART
Version: 1.1.0.0001
Version: 1.2.0.1003
Authors@R: person("Colin", "Carlson", email = "colin.carlson@georgetown.edu", role = c("aut", "cre"))
Description: A convenience tool package for running and analyzing species distribution models with Bayesian additive regression trees.
License: CC0
Encoding: UTF-8
LazyData: true
Depends: raster, dbarts, Metrics, tidyverse, dismo, ROCR, patchwork,
velox, ggpubr, matrixStats, data.table
RoxygenNote: 6.1.1
ggpubr, matrixStats, data.table, ggplot2
RoxygenNote: 7.2.3
Suggests: knitr, rmarkdown
VignetteBuilder: knitr
NeedsCompilation: no
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -3,11 +3,12 @@
S3method(summary,bart)
export(bart.flex)
export(bart.step)
export(bigstack)
export(multipartial)
export(partial)
export(plot.mcmc)
export(plot.ri)
export(predict2.bart)
export(retune)
export(spartial)
export(theme_bluewhite)
export(variable.step)
36 changes: 35 additions & 1 deletion R/bart.step.R
Original file line number Diff line number Diff line change
@@ -23,7 +23,41 @@ bart.step <- function(x.data, y.data, ri.data=NULL,
iter.plot=100,
full=FALSE,
quiet=FALSE) {


###############

# auto-drops

quietly <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
} # THANKS HADLEY

quietly(model.0 <- bart.flex(x.data = x.data, y.data = y.data,
ri.data = ri.data,
n.trees = 200))

if(class(model.0)=='rbart') {
fitobj <- model.0$fit[[1]]
}
if(class(model.0)=='bart') {
fitobj <- model.0$fit
}

dropnames <- colnames(x.data)[!(colnames(x.data) %in% names(which(unlist(attr(fitobj$data@x,"drop"))==FALSE)))]

if(length(dropnames)==0) {} else{
message("Some of your variables have been automatically dropped by dbarts.")
message("(This could be because they're characters, homogenous, etc.)")
message("It is strongly recommended that you remove these from the raw data:")
message(paste(dropnames,collapse = ' '), ' \n')
}

x.data %>% dplyr::select(-dropnames) -> x.data

###############

quiet2 <- quiet
if(full==TRUE){varimp.diag(x.data, y.data, ri.data, iter=iter.plot, quiet=quiet2)}
vs <- variable.step(x.data, y.data, ri.data, n.trees=tree.step, iter=iter.step, quiet=quiet2)
7 changes: 5 additions & 2 deletions R/bartflex.R
Original file line number Diff line number Diff line change
@@ -2,11 +2,13 @@
#'
#' @description
#'
#' A little internal piece of code that allows you to easily drop random intercepts in. Not really for anyone but me :)
#' A little internal piece of code that allows you to easily drop random intercepts in. It's a convenience tool I don't suggest using - it's a shortcut for if you feel very comfortable with the pipeline.
#'
#' @param x.data A data frame of covariates
#' @param y.data A vector of outcomes (1/0)
#' @param ri.data Random intercept data
#' @param ri.data Random intercept data (a column in a data frame)
#' @param ri.name Identify the name of the random intercept column
#' @param y.name Rename outcome variable from training data if you need to
#'
#' @export
#'
@@ -44,3 +46,4 @@ bart.flex <- function(x.data, y.data, ri.data = NULL,
}
return(model)
}

33 changes: 0 additions & 33 deletions R/bigstack.R

This file was deleted.

1 change: 1 addition & 0 deletions R/embarcadero-package.R
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
#' @import Metrics
#' @import tidyverse
#' @import dismo
#' @import ggplot2
#'
#'
#'
161 changes: 161 additions & 0 deletions R/multipartial.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#' @title Multispecies partial plots (in development)
#'
#' @description
#'
#' Partial dependence plots show the response curves of an individual variable in the sum-of-trees models. The main line is the average of partial dependence plots for each posterior draw of sum-of-trees models; each of those curves is generated by evaluating the BART model prediction at each specified x value for *each other combination of other x values in the data*. This is obviously computationally very expensive, and gets slower to run depending on: how much smooth you add, how many variables you ask for, and more posterior draws (ndpost; defaults to 1000) in the bart() function.
#'
#' This is a somewhat altered and simplified version of partial() that allows you to combine different species' responses to the same variables on the same axes. Run the models separately, then use this function to combine them.
#'
#' @param model A dbarts model object
#' @param x.vars A list of the variables for which you want to run the partials. Defaults to doing all of them.
#' @param smooth A multiplier for how much smoother you want the sampling of the levels to be. High values, like 10 or over, are obviously much slower and don't add much.
#' @param trace Traceplots for each individual draw from the posterior
#' @param transform This converts from the logit output of dbarts:::predict to actual 0 to 1 probabilities. I wouldn't turn this off unless you're really interested in a deep dive on the model.
#' @param panels For multiple variables, use this to create a multipanel figure.
#'
#'
#' @return Returns a ggplot object or cowplot object.
#'
#' @examples
#' f <- function(x) { return(0.5 * x[,1] + 3 * x[,2] * x[,3]) - 5*x[,4] }
#' g <- function(x) { return(0.6 * (x[,1]-0.3) + 1 * x[,2] * x[,3]) - 5*x[,4] }
#' sigma <- 0.2
#' n <- 100
#' x <- matrix(2 * runif(n * 3) -1, ncol = 3)
#' x <- data.frame(x)
#' x[,4] <- rbinom(100, 1, 0.3)
#' colnames(x) <- c('rob', 'hugh', 'ed', 'phil')
#' Ey <- f(x)
#' y <- rnorm(n, Ey, sigma)
#' Ez <- g(x)
#' z <- rnorm(n, Ez, sigma)
#' df <- data.frame(z, y, x)
#' set.seed(99)
#'
#' bartFit1 <- bart(y ~ rob + hugh + ed + phil, df,
#' keepevery = 10, ntree = 100, keeptrees = TRUE)
#'
#' bartFit2 <- bart(z ~ rob + hugh + ed + phil, df,
#' keepevery = 10, ntree = 100, keeptrees = TRUE)
#'
#' model.list <- list(bartFit1, bartFit2)
#'
#' multipartial(modl = model.list, spnames = c('Y','Z'),
#' x.vars = c('rob','hugh'),
#' smooth = 7, trace = TRUE, panels = TRUE)
#'
#' @export
#'
#'

multipartial <- function(modl, spnames, x.vars = NULL,
smooth = 7, trace = TRUE,
transform = TRUE, panels = FALSE) {

# A couple errors in case I'm Idiot

if(smooth>10) {
warning("You have chosen way, way too much smoothing... poorly")
}

if(!is.null(x.vars) && length(x.vars)==1 && panels==TRUE) {
stop("Hey bud, you can't do several panels on only one variable!")
}

# Actually build each partial

pd.get <- function(model) {
if (is.null(x.vars)) { raw <- model$fit$data@x} else { raw <- model$fit$data@x[,x.vars]}

if(!is.null(x.vars) && length(x.vars)==1) {
minmax <- data.frame(mins = min(raw),
maxs = max(raw)) } else {
minmax <- data.frame(mins = apply(raw, 2, min),
maxs = apply(raw, 2, max))
}
lev <- lapply(c(1:nrow(minmax)), function(i) {seq(minmax$mins[i], minmax$maxs[i], (minmax$maxs[i]-minmax$mins[i])/(10*smooth))})

for(i in 1:length(lev)){
if(length(lev)==1) {
if(length(unique(raw))==2) { lev[[i]] <- unique(raw) }
} else {
if(length(unique(raw[,i]))==2) { lev[[i]] <- unique(raw[,i])}
}
}

pd <- pdbart(model, xind = x.vars, levs = lev, pl=FALSE)
}

pdl <- lapply(modl, pd.get)

# Generate the partial visualizations in ggplot


plots <- list()

for (i in 1:length(pdl[[1]]$fd)) {

for (j in 1:length(pdl)) {
q50f <- function(x) {apply(x$fd[[i]], 2, median)}
q50 <- data.frame(lapply(pdl, q50f))
colnames(q50) <- spnames
if(transform==TRUE) {q50 <- apply(q50, 2, pnorm)}
q50 %>% as_tibble() %>% mutate(x = pdl[[1]]$levs[[i]]) -> df
}

df %>% pivot_longer(-x) %>% rename(Species = name) -> df

if(trace==TRUE) {

ff <- function(x) {data.frame(t(x$fd[[i]])) }
f <- lapply(pdl, ff)
traces <- lapply(c(1:length(pdl)), function(k) {
colnames(f[[k]]) <- gsub('X', 'iter', colnames(f[[k]]))
df %>% filter(Species == spnames[k]) %>% pull(x) -> x.i
f[[k]]$x <- x.i
f[[k]]$Species <- spnames[k]
f[[k]]
})
traces <- do.call("rbind", traces)
df <- left_join(df, traces)

}

#if(transform==TRUE) {df %>% mutate(value = pnorm(value)) -> df}

g <- ggplot(df,aes(x=x, y = value, colour = Species)) +
labs(title=pdl[[1]]$xlbs[[i]], y='', x='') + theme_light(base_size = 16) +
theme(plot.title = element_text(hjust = 0.5),
axis.title.y = element_text(vjust=1.7))

if(trace==TRUE) {
if(transform==TRUE) {
for(j in 3:ncol(df)) {
g <- g + geom_line(aes_string(y=pnorm(pull(df[,j]))), alpha = 0.025)
}
} else {
for(j in 3:ncol(df)) {
g <- g + geom_line(aes_string(y=pull(df[,j])), alpha = 0.025)
}
}
}

g <- g + geom_line(size=1.25)

if(panels==FALSE) {g <- g + theme(plot.margin=unit(c(0.5,0.5,0.5,0.5),"cm"))} else {
g <- g + theme(plot.margin=unit(c(0.15,0.15,0.15,0.15),"cm"))
}
plots[[i]] <- g

}

# Return them

if(panels==TRUE) {#print(cowplot::plot_grid(plotlist=plots))
return(wrap_plots(plotlist=plots, guides = 'collect'))
} else {
return(plots)
}

}

12 changes: 10 additions & 2 deletions R/partials.R
Original file line number Diff line number Diff line change
@@ -62,7 +62,15 @@ partial <- function(model, x.vars=NULL, equal=TRUE, smooth=1,
# This is for something else ultimately: attr(bartFit$fit$data@x, "term.labels")
# This is where equal happens

if (is.null(x.vars)) { raw <- model$fit$data@x} else { raw <- model$fit$data@x[,x.vars]}

if(class(model)=='rbart') {
fitobj <- model$fit[[1]]
}
if(class(model)=='bart') {
fitobj <- model$fit
}

if (is.null(x.vars)) { raw <- fitobj$data@x} else { raw <- fitobj$data@x[,x.vars]}

if(equal==TRUE) {
if(!is.null(x.vars) && length(x.vars)==1) {
@@ -146,7 +154,7 @@ for (i in 1:length(pd$fd)) {
theme(plot.title = element_text(hjust = 0.5),
axis.title.y = element_text(vjust=1.7))

if(ci==TRUE) {alpha2 <- 0.05; k <- 4} else {alpha2 <- 0.025*(model$fit$control@n.trees/200); k <- 2}
if(ci==TRUE) {alpha2 <- 0.05; k <- 4} else {alpha2 <- 0.025*(fitobj$control@n.trees/200); k <- 2}
if(trace==TRUE) {
if(transform==TRUE) {
for(j in 1:nrow(pd$fd[[i]])) {
53 changes: 0 additions & 53 deletions R/plot_mcmc.R
Original file line number Diff line number Diff line change
@@ -10,60 +10,7 @@
#' @param wait Adds a Sys.sleep after the plots; because the plots are two-panel they can be a little delayed sometimes, which isn't super visually smooth. However, if you want something that's particularly crisp, I'd suggest using the animation package (see below example).
#' @param quiet Turns off progress bars if TRUE
#'
#' @examples
#' ### Setup
#' set.seed(42)
#' onelandscape <- function(x) {NLMR::nlm_gaussianfield(nrow = 150,
#' ncol = 150,
#' rescale = FALSE)}
#' climate <- stack(lapply(c(1:8), onelandscape))
#' xnames <- c('x1','x2','x3','x4','x5','x6','x7','x8')
#' names(climate) <- xnames
#'
#' plot(climate[[1]],main='An imaginary variable')
#' random.sp <- generateRandomSp(climate[[1:4]],
#' # ^ These are the informative predictors
#' approach="pca",
#' relations='gaussian',
#' species.prevalence=0.5,
#' realistic.sp = TRUE,
#' PA.method='threshold')
#' sp.points <- sampleOccurrences(random.sp,
#' n=250,
#' type = 'presence-absence',
#' detection.probability = 0.9)
#' occ <- SpatialPoints(sp.points$sample.points[,c('x','y')])
#' occ.df <- cbind(sp.points$sample.points,
#' raster::extract(climate, occ))
#' occ.df <- occ.df[,-c(1:3)]
#'
#' ### The actual example
#'
#' sdm <- bart(y.train=occ.df[,'Observed'],
#' x.train=occ.df[,xnames],
#' keeptrees = TRUE)
#'
#' plot.mcmc(sdm, climate, iter=50)
#'
#' sdm <- bart(y.train=occ.df[,'Observed'],
#' x.train=occ.df[,xnames],
#' keeptrees = TRUE,
#' nskip=0,
#' ntree=10)
#'
#' plot.mcmc(sdm, climate, iter=50)
#'
#' ###If you want to animate it:
#'
#' library(animation)
#'
#' saveGIF(plot.mcmc(sdm, climate, iter=50), movie.name = "Timelapse.gif", interval = 0.15,
#' ani.width = 800, ani.height = 400)
#'
#' @export
#'
#'
#'

plot.mcmc <- function(object, inputstack, iter=100, wait=0.1, quiet=FALSE) {

2 changes: 1 addition & 1 deletion R/plot_ri.R
Original file line number Diff line number Diff line change
@@ -37,7 +37,7 @@ plot.ri <- function(model, temporal=TRUE) {
panel.grid.minor = element_blank()) -> p
} else {

modelH.ri$ranef %>% data.frame() %>% gather() %>%
model$ranef %>% data.frame() %>% gather() %>%
group_by(key) %>%
summarise(mean = mean(value),
upper = quantile(value, 0.975, na.rm = TRUE),
Loading