Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Vianey Leos Barajas authored Jun 20, 2020
1 parent 7a09c08 commit 2a868df
Show file tree
Hide file tree
Showing 9 changed files with 281 additions and 10 deletions.
20 changes: 20 additions & 0 deletions StanCourseVLB/logregmodel2.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// The input data is a vector 'y' of length 'TT'.
data {
int<lower=1> TT;
int y[TT];

int ncov;
matrix[TT, ncov + 1] x;
}

parameters {
//real<lower=0, upper=1> p;
vector[ncov + 1] beta;
}

model {
//p ~ uniform(0, 1);
beta ~ normal(0, 0.5);
y ~ bernoulli_logit(x*beta);
}

Binary file added StanCourseVLB/logregmodel3.rds
Binary file not shown.
30 changes: 30 additions & 0 deletions StanCourseVLB/logregmodel3.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// The input data is a vector 'y' of length 'TT'.
data {
int<lower=1> TT;
int y[TT];

int ncov;
matrix[TT, ncov] x;

vector[TT] bsize;
vector[TT] sex;
}

parameters {
//real<lower=0, upper=1> p;
vector[ncov] beta;
vector[3] alpha;
}

model {

// Priors
beta ~ normal(0, 0.5);
alpha[1] ~ normal(0, 0.5);
alpha[2] ~ normal(0, 0.5);
//values range from -40 to 40
alpha[3] ~ normal(0, 0.1);

y ~ bernoulli_logit(alpha[1] + alpha[2]*sex + alpha[3]*bsize + x*beta);
}

Binary file added StanCourseVLB/logregmodel4.rds
Binary file not shown.
48 changes: 48 additions & 0 deletions StanCourseVLB/logregmodel4.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// The input data is a vector 'y' of length 'TT'.
data {
int<lower=1> TT;
int y[TT];

int sexmissing[TT];
int ncov;
matrix[TT, ncov] x;

vector[TT] bsize;
vector[TT] sex;
}

parameters {
//real<lower=0, upper=1> p;
vector[ncov] beta;
vector[3] alpha;
real<lower=0, upper=1> pi;
}

model {

// Priors
beta ~ normal(0, 0.5);
alpha[1] ~ normal(0, 0.5);
alpha[2] ~ normal(0, 0.5);
//values range from -40 to 40
alpha[3] ~ normal(0, 0.1);

for(t in 1:TT){
if(sexmissing[t] == 1){
target += log_mix(pi,
bernoulli_logit_lpmf(y[t] | alpha[1] +
alpha[3]*bsize +
x[t]*beta),
bernoulli_logit_lpmf(y[t] | alpha[1] +
alpha[2] +
alpha[3]*bsize[t] +
x[t]*beta));
} else {
y[t] ~ bernoulli_logit(alpha[1] +
alpha[2]*sex[t] +
alpha[3]*bsize[t] +
x[t]*beta);
}
}
}

Binary file added StanCourseVLB/logregmodel5.rds
Binary file not shown.
47 changes: 47 additions & 0 deletions StanCourseVLB/logregmodel5.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// The input data is a vector 'y' of length 'TT'.
data {
int<lower=1> TT;
int y[TT];

int ncov;
matrix[TT, ncov] x;

vector[TT] bsize;
vector[TT] sex;

int nsharks;
int sharkid[TT];
}

parameters {
//real<lower=0, upper=1> p;
vector[ncov] beta[nsharks];
vector[3] alpha;

real mu;
real<lower=0> stdev;

}

model {

// Priors
mu ~ normal(0, 0.1);
stdev ~ normal(0, 0.1); // by default, this is the half-normal (because stdev > 0)


alpha[1] ~ normal(0, 0.5);
alpha[2] ~ normal(0, 0.5);
//values range from -40 to 40
alpha[3] ~ normal(0, 0.1);

for(n in 1:nsharks){
beta[n] ~ normal(mu, stdev);
}

for(t in 1:TT){
y[t] ~ bernoulli_logit(alpha[1] + alpha[2]*sex[t] + alpha[3]*bsize[t] + x[t]*beta[sharkid[t]]);
}

}

136 changes: 131 additions & 5 deletions StanCourseVLB/runningstancode_vlb1.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ knitr::opts_chunk$set(echo = TRUE)
library(rstan)
library(ggplot2)
library(tidyverse)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
```

## Simulating Data

```{r, simulatedata}
```{r, simulatedata, warning=FALSE, message=FALSE}
set.seed(17)
Expand Down Expand Up @@ -46,19 +48,143 @@ for(n in 1:20){
}
sharkfun <- bind_rows(y)
sharkfun <- dplyr::bind_rows(y)
```


## Running the Stan models

```{r}
data1 <- list(TT = dim(sharkfun)[1],
y= sharkfun$Presence)
fit1 <- stan(file = "logregmodel1.stan", data=data1)
fit1
```


```{r}
data2 <- list(TT = dim(sharkfun)[1],
y=sharkfun$Presence,
ncov = 2,
x = cbind(1, sharkfun$timecos, sharkfun$timesin))
fit2 <- stan(file = "logregmodel2.stan", data=data2)
```


```{r}
data1 <- list(y= sharkfun$Presence)
fit1 <- stan(file = "logregmodel1.stan", data=data1)
fit1
data3 <- list(TT = dim(sharkfun)[1],
y=sharkfun$Presence,
ncov = 2,
x = cbind(sharkfun$timecos, sharkfun$timesin),
bsize = sharkfun$Size - 220,
sex = sharkfun$Sex)
fit3 <- stan(file = "logregmodel3.stan", data=data3)
```

```{r}
data4 <- list(TT = dim(sharkfun)[1],
y=sharkfun$Presence,
sexmissing = as.numeric(sharkfun$Shark == "Shark 3"),
ncov = 2,
x = cbind(sharkfun$timecos, sharkfun$timesin),
bsize = sharkfun$Size - 220,
sex = sharkfun$Sex)
fit4 <- stan(file = "logregmodel4.stan", data=data4)
```


```{r}
data5 <- list(TT = dim(sharkfun)[1],
y=sharkfun$Presence,
ncov = 2,
x = cbind(sharkfun$timecos, sharkfun$timesin),
bsize = sharkfun$Size - 220,
sex = sharkfun$Sex,
nsharks = 20,
sharkid = rep(1:20, each=365))
fit5 <- stan(file="logregmodel5.stan", data=data5)
```


## projpred

```{r}
library(rstanarm)
library(projpred)
library(ggplot2)
library(bayesplot)
theme_set(theme_classic())
n <- 7300
D <- 5
p0 <- 2 # prior guess for the number of relevant variables
tau0 <- p0/(D-p0) * 1/sqrt(n)
# regularized horseshoe prior
prior_coeff <- hs(global_scale = tau0, slab_scale = 1)
#----------------------------------------------------------------------
## FITTING THE MODELS
logregcov.horseshoe <- stan_glm(formula = Presence~.,
family = binomial(link="logit"),
data = sharkfun%>%dplyr::select(-Shark),
prior = hs(global_scale = 0.007802743,
slab_scale = 1))
logregcov.defaultpriors <- stan_glm(formula = Presence~.,
family = binomial(link="logit"),
data = sharkfun%>%dplyr::select(-Shark))
logregcov.n01 <- stan_glm(formula = Presence~.,
family = binomial(link="logit"),
data = sharkfun%>%dplyr::select(-Shark),
prior = normal(0,1))
#----------------------------------------------------------------------
## K-FOLD CROSS-VALIDATION
cvs.horseshoe <- cv_varsel(logregcov.horseshoe,
method='forward',
cv_method='kfold', K=5)
cvplot.horseshoe <- varsel_plot(cvs.horseshoe,
stats=c('elpd, acc')) +
theme_minimal() + theme(text=element_text(size=15),
legend.position = "none") +
ggtitle("Classification Accuracy (5-fold CV)")
cvplot.horseshoe
suggest_size(cvs)
vs <- varsel(logregcov.horseshoe, method='forward')
vs$vind
varsel_plot(vs, stats=c('elpd', 'acc'), deltas=F)
#----------
cvs.horseshoe$vind
```






10 changes: 5 additions & 5 deletions StanCourseVLB/stanhour3.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ model{
x[t]*beta));
} else {
y[t] ~ bernoulli_logit(alpha[1] +
alpha[2]*sex[t]
alpha[2]*sex[t] +
alpha[3]*bsize[t] +
x[t]*beta);
}
Expand Down Expand Up @@ -385,13 +385,13 @@ Changes: have an index for individual shark and $\beta_1$ is now a hierarchical
```{stan, output.var = "ex4", eval=F}
data{
...
int no.sharks;
int nsharks;
vector[TT] sharkid;
}
parameters{
...
vector[no.sharks] beta;
vector[nsharks] beta;
real betamu;
real<lower=0> betasig;
}
Expand Down Expand Up @@ -466,7 +466,7 @@ https://mc-stan.org/projpred/articles/quickstart.html
Back to the fixed effects for a minute:

```{r, eval=F}
n <- 4000
n <- 7300
D <- 5
p0 <- 2 # prior guess for the number of relevant variables
tau0 <- p0/(D-p0) * 1/sqrt(n)
Expand All @@ -477,7 +477,7 @@ prior_coeff <- hs(global_scale = tau0, slab_scale = 1)
logregcov.horseshoe <- stan_glm(formula = Presence~.,
family = binomial(link="logit"),
data = sharkfun,
prior = hs(global_scale = 0.03049858,
prior = hs(global_scale = 0.007802743,
slab_scale = 1))
logregcov.defaultpriors <- stan_glm(formula = Presence~.,
Expand Down

0 comments on commit 2a868df

Please sign in to comment.