Skip to content

Commit

Permalink
minor edits to both vignettes, rebuilt them R CMD build style
Browse files Browse the repository at this point in the history
  • Loading branch information
pboesu committed Jun 9, 2016
1 parent 2d4ee03 commit 6dadb48
Show file tree
Hide file tree
Showing 12 changed files with 189 additions and 376 deletions.
34 changes: 29 additions & 5 deletions inst/doc/chytrid_dede_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,27 +113,51 @@ iter = 500
# inference call
dede_rev <- de_mcmc(N = iter, data=chytrid, de.model=CSZ.dede,
obs.model=chytrid_obs_model, all.params=mcmc.pars,
Tmax = max(chytrid$time), data.times=c(0,chytrid$time), cnt=iter %/% 10,
Tmax = max(chytrid$time), data.times=c(0,chytrid$time), cnt=50,
plot=FALSE, sizestep=0.1, solver="dede", verbose = TRUE)

## ----message=FALSE, warning=FALSE,fig.width = 8, fig.height = 8----------
par(mfrow = c(3,4))
plot(dede_rev, ask=FALSE, auto.layout=FALSE)

## ---- fig.width = 8, fig.height = 8--------------------------------------
burnin = 50
burnin = 100
pairs(dede_rev, burnin = burnin, scatter=TRUE, trend=TRUE)
post_prior_densplot(dede_rev, burnin = burnin)
summary(dede_rev)

## ------------------------------------------------------------------------
par(mfrow=c(2,3), mgp=c(2.2, 0.8, 0))
#define a fancy y axis label
ylabel = expression(paste(Pr,"(", theta,"|", "Y", ")"))
#plot the individual parameters
post_prior_densplot(dede_rev, param="sr",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,8),
main=expression(paste("s",phantom()[{paste("r")}])))
legend("topright", legend=c("Posterior","Prior"), lty = 1, col = c("black", "red"))
post_prior_densplot(dede_rev, param="fs",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(-0.1,1.1),
main=expression(paste("f",phantom()[{paste("s")}])))
post_prior_densplot(dede_rev, param="ds",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,3),
main=expression(paste("d",phantom()[{paste("s")}])))
post_prior_densplot(dede_rev, param="muz",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,6),
main=expression(paste(mu,phantom()[{paste("Z")}])))
post_prior_densplot(dede_rev, param="eta",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,50), ylim=c(0,0.2),
main=expression(eta))
post_prior_densplot(dede_rev, param="Tmin",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(1.5,6.5),
main=expression(paste("T",phantom()[{paste("min")}])))

## ----post-sims-----------------------------------------------------------
post_traj <- post_sim(dede_rev, n=20, times=0:100, burnin=burnin, output = 'all')
post_traj <- post_sim(dede_rev, n=100, times=0:100, burnin=burnin, output = 'all', prob = 0.95)

## ----post-sims-plot, fig.width = 8, fig.height = 3-----------------------
#median and HDI
par(mfrow=c(1,3))
plot(post_traj, plot.type = "medianHDI", auto.layout = FALSE)
legend("topright", legend=c("posterior median", "95% HDI"), lty=1, col=c("red","grey"))
legend("topright", legend=c("posterior median", "95% HDI"), lty=1, col=c("red","grey"), bty='n')


## ----post-sims-ensemble, fig.width = 8, fig.height = 6-------------------
Expand Down
66 changes: 50 additions & 16 deletions inst/doc/chytrid_dede_example.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ author: "Philipp H Boersch-Supan and Leah R Johnson"
date: '`r format(Sys.Date(), "%B %d, %Y")`'
output:
pdf_document:
keep_tex: yes
number_sections: yes
bibliography: debinfer.bib
vignette: >
Expand All @@ -15,7 +14,9 @@ vignette: >

#Preliminaries

This examples assumes that `deBInfer` is installed and loaded. If this is not the case it needs to be installed from github, which requires the devtools package.
This vignette illustrates the steps needed to perform inference for a DDE model of population growth in a fungal pathogen. A detailed description of the rationale behind the Bayesian inference approach for differential equations can be found in the preprint describing the `deBInfer` package [@deBInfer].

This example assumes that `deBInfer` is installed and loaded. If this is not the case it needs to be installed from github, which requires the devtools package [@devtools].
```{r install1, eval=FALSE}
install.packages("devtools")
```
Expand Down Expand Up @@ -107,9 +108,7 @@ The log-likelihood of the data given the parameters, underlying model, and initi
\ell(\mathbf{Z}|\mathbf{\theta})=\sum\limits^n_{t}Z_t \text{ log }\lambda-n\lambda
\end{equation}

**Explain epsilon correction**

This can be translated into an observation model function for `deBInfer`. The observation model function must have three named arguments `data`, `sim.data`, and `samp`, as these are used by the MCMC procedure to pass in the data (as a `data.frame`), the current state of the Markov chain (as a named vector), and the associated DE model solution (as a matrix-like object of class `deSolve`). We can access these inputs to define the data likelihood. In this case we have repeat measurements for each time point, so we iterate over the unique timepoints in `data$time`, and then calculate the sum log-likelihood over all matching `data$count` observations using the current value of the state variable $Z$ from the DE model solution at this point in the Markov chain.
This can be translated into an observation model function for `deBInfer`. The observation model function must have three named arguments `data`, `sim.data`, and `samp`, as these are used by the MCMC procedure to pass in the data (as a `data.frame`, i.e. indexed using the `$` operator and the column names of the input data), the current state of the Markov chain (as a named vector), and the associated DE model solution (as a matrix-like object of class `deSolve`, i.e. indexed using the `[ ]` operator and the declared names of the state variables). We can access these inputs to define the data likelihood. In this case we have repeat measurements for each time point, so we iterate over the unique timepoints in `data$time`, and then calculate the sum log-likelihood over all matching `data$count` observations using the current value of the state variable $Z$ from the DE model solution at this point in the Markov chain.

```{r obs-model}
# observation model
Expand All @@ -126,8 +125,11 @@ chytrid_obs_model<-function(data, sim.data, samp){
return(llik)
}
```
Following Johnson et al. [-@johnson2013bayesian] we employ a small correction `ec` that is needed because the DE solution can equal zero, whereas the parameter `lambda` of the Poisson likelihood must be strictly positive.


## Parameter declaration

We continue by defining the parameters for inference
```{r vars}
sr <- debinfer_par(name = "sr", var.type = "de", fixed = FALSE,
Expand Down Expand Up @@ -161,52 +163,84 @@ S <- debinfer_par(name = "S", var.type = "init", fixed = TRUE, value = 0)
Z <- debinfer_par(name = "Z", var.type = "init", fixed = TRUE, value = 0)
```

**Explain importance of init order**
## MCMC Inference

The declared parameters are then collated using the `setup_debinfer` function. Note that **the initial values must be entered in the same order, as they are specified in the DE model function**, as the solver matches these values by position, rather than by name. More details can be found in `?deSolve::dede`. The remaining parameters can be entered in any order.

```{r mcmc-setup}
# ----setup---------------------------------------------------------------
mcmc.pars <- setup_debinfer(sr, fs, ds, muz, eta, Tmin, C, S, Z)
```

And then do the actual inference
`de_mcmc` is the workhorse of the package and runs the MCMC estimation. The progress of the MCMC procedure can be monitored using the `cnt`, `plot` and `verbose` options: Every `cnt` iterations the function will print out information about the current state, and, if `plot=TRUE`, traceplots of the chains will be plotted. Setting `verbose=TRUE` will print additional information. Note that frequent plotting will substantially slow down the MCMC sampler, and should be used only on short runs when tuning the sampler.
```{r de_mcmc, results='hide'}
# do inference with deBInfer
# MCMC iterations
iter = 500
# inference call
dede_rev <- de_mcmc(N = iter, data=chytrid, de.model=CSZ.dede,
obs.model=chytrid_obs_model, all.params=mcmc.pars,
Tmax = max(chytrid$time), data.times=c(0,chytrid$time), cnt=iter %/% 10,
Tmax = max(chytrid$time), data.times=c(0,chytrid$time), cnt=50,
plot=FALSE, sizestep=0.1, solver="dede", verbose = TRUE)
```
Note that the number of iterations was set to 500 to keep the build time of the vignette within acceptable limits. For the inference results reported in Boersch-Supan and Johnson [-@deBInfer] the MCMC procedure was run for 100000 iterations, which took about 4 hours on a 2014 Apple Mac mini with a 2.6 GHz Intel i5 processor.

## MCMC diagnostics

We plot and summarize the MCMC chains
```{r message=FALSE, warning=FALSE,fig.width = 8, fig.height = 8}
par(mfrow = c(3,4))
plot(dede_rev, ask=FALSE, auto.layout=FALSE)
```
From the traceplot we can see that the burnin period is **about 200 samples**. We can remove the burnin and have a look at parameter correlations, and the overlap between the posterior and prior densities.
From the traceplot we can see that with only 500 iterations the chains have neither mixed well, nor reached stationarity. For demonstration purposes we remove a burnin period of 100 samples and have a look at parameter correlations, and the overlap between the posterior and prior densities.
```{r, fig.width = 8, fig.height = 8}
burnin = 50
burnin = 100
pairs(dede_rev, burnin = burnin, scatter=TRUE, trend=TRUE)
post_prior_densplot(dede_rev, burnin = burnin)
summary(dede_rev)
```
We simulate DE model trajectories from the posterior and calculate the HPD interval for the deterministic part of the model.
More control over the `post_prior_densplot` can be achieved by plotting the parameters individually, using the `param` option. This way the x and y limits of the plots can be adjusted to show a larger portion of the prior support, and fancy labels can be added.
```{r}
par(mfrow=c(2,3), mgp=c(2.2, 0.8, 0))
#define a fancy y axis label
ylabel = expression(paste(Pr,"(", theta,"|", "Y", ")"))
#plot the individual parameters
post_prior_densplot(dede_rev, param="sr",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,8),
main=expression(paste("s",phantom()[{paste("r")}])))
legend("topright", legend=c("Posterior","Prior"), lty = 1, col = c("black", "red"))
post_prior_densplot(dede_rev, param="fs",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(-0.1,1.1),
main=expression(paste("f",phantom()[{paste("s")}])))
post_prior_densplot(dede_rev, param="ds",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,3),
main=expression(paste("d",phantom()[{paste("s")}])))
post_prior_densplot(dede_rev, param="muz",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,6),
main=expression(paste(mu,phantom()[{paste("Z")}])))
post_prior_densplot(dede_rev, param="eta",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(0,50), ylim=c(0,0.2),
main=expression(eta))
post_prior_densplot(dede_rev, param="Tmin",xlab=expression(theta),
ylab=ylabel, show.obs=FALSE, xlim=c(1.5,6.5),
main=expression(paste("T",phantom()[{paste("min")}])))
```

## Simulating posterior trajectories
We simulate 100 DE model trajectories from the posterior and calculate the 95% highest posterior density interval for the deterministic part of the model.

```{r post-sims}
post_traj <- post_sim(dede_rev, n=20, times=0:100, burnin=burnin, output = 'all')
post_traj <- post_sim(dede_rev, n=100, times=0:100, burnin=burnin, output = 'all', prob = 0.95)
```

We can visualise the median posterior trajectory and the associated highest posterior density interval using `r plot.type="medianHDI"`
We can visualise the median posterior trajectory and the highest posterior density interval using `r plot.type="medianHDI"`
```{r post-sims-plot, fig.width = 8, fig.height = 3}
#median and HDI
par(mfrow=c(1,3))
plot(post_traj, plot.type = "medianHDI", auto.layout = FALSE)
legend("topright", legend=c("posterior median", "95% HDI"), lty=1, col=c("red","grey"))
legend("topright", legend=c("posterior median", "95% HDI"), lty=1, col=c("red","grey"), bty='n')
```
Alternatively we can plot an ensemble of posterior trajectories
Alternatively we can plot an ensemble of posterior trajectories using `r plot.type="ensemble"`
```{r post-sims-ensemble, fig.width = 8, fig.height = 6}
plot(post_traj, plot.type = "ensemble", col = "#FF000040")
```
Expand Down
Binary file modified inst/doc/chytrid_dede_example.pdf
Binary file not shown.
56 changes: 30 additions & 26 deletions inst/doc/logistic_ode_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ library(deBInfer)
## ----ode-def, message=FALSE,warning=FALSE--------------------------------
library(deSolve)
logistic_model <- function (time, y, parms) {
with(as.list(c(y, parms)), {
dN <- r * N * (1 - N / K)
list(dN)
})
with(as.list(c(y, parms)), {
dN <- r * N * (1 - N / K)
list(dN)
})
}

## ----pars-def------------------------------------------------------------
y <- c(N = 0.1)
parms <- c(r = 0.1, K = 10)
times <- seq(0, 120, 1)
Expand All @@ -29,7 +31,8 @@ plot(out)

## ------------------------------------------------------------------------
set.seed(143)
N_obs <- as.data.frame(out[c(1,runif(35, 0, nrow(out))),]) #force include the first time-point (t=0)
#force include the first time-point (t=0)
N_obs <- as.data.frame(out[c(1,runif(35, 0, nrow(out))),])


## ------------------------------------------------------------------------
Expand All @@ -38,40 +41,32 @@ N_obs <- as.data.frame(out[c(1,runif(35, 0, nrow(out))),]) #force include the fi
N_obs$N_noisy <- rlnorm(nrow(N_obs), log(N_obs$N),parms['sdlog.N'])
#observations must be ordered for solver to work
N_obs <- N_obs[order(N_obs$time),]

#Plot true and noisy observations
plot(N_obs$time, N_obs$N, ylim=c(0, max(N_obs$N,N_obs$N_noisy)))
points(N_obs$time, N_obs$N_noisy, col="red")

out_obs <- ode(y, c(0,N_obs$time), logistic_model, parms, method='lsoda')
plot(out_obs, type="p")
lines(out)

## ----obs-model-----------------------------------------------------------
# the observation model
logistic_obs_model<-function(data, sim.data, samp){

llik.N<-sum(dlnorm(data$N_noisy, meanlog=log(sim.data[,"N"] + 1e-6),
sdlog=samp[['sdlog.N']], log=TRUE)
)

llik<-llik.N

return(llik)
sdlog=samp[['sdlog.N']], log=TRUE))
return(llik.N)
}


## ----pars, results="hide", message=FALSE---------------------------------
library(deBInfer)
r <- debinfer_par(name = "r", var.type = "de", fixed = FALSE,
value = 0.5, prior="norm", hypers=list(mean = 0, sd = 1),
prop.var=0.005, samp.type="rw")
prop.var=0.0001, samp.type="rw")

K <- debinfer_par(name = "K", var.type = "de", fixed = FALSE,
value = 5, prior="lnorm", hypers=list(meanlog = 1, sdlog = 1),
prop.var=0.1, samp.type="rw")

sdlog.N <- debinfer_par(name = "sdlog.N", var.type = "obs", fixed = FALSE,
value = 0.1, prior="lnorm", hypers=list(meanlog = 0, sdlog = 1),
value = 0.05, prior="lnorm", hypers=list(meanlog = 0, sdlog = 1),
prop.var=c(3,4), samp.type="rw-unif")

## ----inits---------------------------------------------------------------
Expand All @@ -87,26 +82,35 @@ mcmc.pars <- setup_debinfer(r, K, sdlog.N, N)
# inference call
mcmc_samples <- de_mcmc(N = iter, data=N_obs, de.model=logistic_model,
obs.model=logistic_obs_model, all.params=mcmc.pars,
Tmax = max(N_obs$time), data.times=N_obs$time, cnt=iter+1,
plot=FALSE, sizestep=0.1, solver="ode")
Tmax = max(N_obs$time), data.times=N_obs$time, cnt=500,
plot=FALSE, solver="ode")


## ----message=FALSE, warning=FALSE,fig.width = 8, fig.height = 8----------
plot(mcmc_samples)

## ------------------------------------------------------------------------
burnin = 200
burnin = 250
pairs(mcmc_samples, burnin = burnin, scatter=TRUE, trend=TRUE)
post_prior_densplot(mcmc_samples, burnin = burnin)
summary(mcmc_samples)

## ------------------------------------------------------------------------
par(mfrow=c(1,3))
post_prior_densplot(mcmc_samples, burnin = burnin, param="r")
abline(v=0.1, col="red", lty=2)
post_prior_densplot(mcmc_samples, burnin = burnin, param="K")
abline(v=10, col="red", lty=2)
post_prior_densplot(mcmc_samples, burnin = burnin, param="sdlog.N")
abline(v=0.05, col="red", lty=2)

## ----post-sims-----------------------------------------------------------
post_traj <- post_sim(mcmc_samples, n=200, times=0:100, burnin=burnin, output = 'all')
post_traj <- post_sim(mcmc_samples, n=500, times=0:100, burnin=burnin, output = 'all', prob = 0.95)

## ----post-sims-plot------------------------------------------------------
#median and HDI
plot(post_traj, plot.type = "medianHDI")
legend("topleft", legend=c("posterior median", "95% HDI"), lty=1, col=c("red","grey"))
plot(post_traj, plot.type = "medianHDI", lty = c(2,1), lwd=3, col=c("red","grey20"),
panel.first=lines(out, col="darkblue", lwd=2))
legend("topleft", legend=c("posterior median", "95% HDI", "true model"),
lty=c(2,1,1), lwd=c(3,2,2), col=c("red","grey20","darkblue"), bty='n')


## ----post-sims-ensemble--------------------------------------------------
Expand Down
Loading

0 comments on commit 6dadb48

Please sign in to comment.