Add files via upload

aammd · Jun 20, 2020 · 2a868df · 2a868df
1 parent 7a09c08
commit 2a868df
Show file tree

Hide file tree

Showing 9 changed files with 281 additions and 10 deletions.
diff --git a/StanCourseVLB/logregmodel2.stan b/StanCourseVLB/logregmodel2.stan
@@ -0,0 +1,20 @@
+// The input data is a vector 'y' of length 'TT'.
+data {
+  int<lower=1> TT;
+  int y[TT];
+
+  int ncov;
+  matrix[TT, ncov + 1] x;
+}
+
+parameters {
+  //real<lower=0, upper=1> p;
+  vector[ncov + 1] beta;
+}
+
+model {
+  //p ~ uniform(0, 1);
+  beta ~ normal(0, 0.5);
+  y ~ bernoulli_logit(x*beta);
+}
+
diff --git a/StanCourseVLB/logregmodel3.rds b/StanCourseVLB/logregmodel3.rds
diff --git a/StanCourseVLB/logregmodel3.stan b/StanCourseVLB/logregmodel3.stan
@@ -0,0 +1,30 @@
+// The input data is a vector 'y' of length 'TT'.
+data {
+  int<lower=1> TT;
+  int y[TT];
+
+  int ncov;
+  matrix[TT, ncov] x;
+
+  vector[TT] bsize;
+  vector[TT] sex;
+}
+
+parameters {
+  //real<lower=0, upper=1> p;
+  vector[ncov] beta;
+  vector[3] alpha;
+}
+
+model {
+
+  // Priors
+  beta ~ normal(0, 0.5);
+  alpha[1] ~ normal(0, 0.5);
+  alpha[2] ~ normal(0, 0.5);
+  //values range from -40 to 40
+  alpha[3] ~ normal(0, 0.1);
+
+  y ~ bernoulli_logit(alpha[1] + alpha[2]*sex + alpha[3]*bsize + x*beta);
+}
+
diff --git a/StanCourseVLB/logregmodel4.rds b/StanCourseVLB/logregmodel4.rds
diff --git a/StanCourseVLB/logregmodel4.stan b/StanCourseVLB/logregmodel4.stan
@@ -0,0 +1,48 @@
+// The input data is a vector 'y' of length 'TT'.
+data {
+  int<lower=1> TT;
+  int y[TT];
+
+  int sexmissing[TT];
+  int ncov;
+  matrix[TT, ncov] x;
+
+  vector[TT] bsize;
+  vector[TT] sex;
+}
+
+parameters {
+  //real<lower=0, upper=1> p;
+  vector[ncov] beta;
+  vector[3] alpha;
+  real<lower=0, upper=1> pi;
+}
+
+model {
+
+  // Priors
+  beta ~ normal(0, 0.5);
+  alpha[1] ~ normal(0, 0.5);
+  alpha[2] ~ normal(0, 0.5);
+  //values range from -40 to 40
+  alpha[3] ~ normal(0, 0.1);
+
+ for(t in 1:TT){
+  if(sexmissing[t] == 1){
+    target += log_mix(pi, 
+                bernoulli_logit_lpmf(y[t] | alpha[1] + 
+                    alpha[3]*bsize +
+                    x[t]*beta), 
+                bernoulli_logit_lpmf(y[t] | alpha[1] + 
+                    alpha[2] + 
+                    alpha[3]*bsize[t] +
+                    x[t]*beta));
+  } else {
+    y[t] ~ bernoulli_logit(alpha[1] +
+                    alpha[2]*sex[t] + 
+                    alpha[3]*bsize[t] +
+                    x[t]*beta);
+  }
+ }
+}
+
diff --git a/StanCourseVLB/logregmodel5.rds b/StanCourseVLB/logregmodel5.rds
diff --git a/StanCourseVLB/logregmodel5.stan b/StanCourseVLB/logregmodel5.stan
@@ -0,0 +1,47 @@
+// The input data is a vector 'y' of length 'TT'.
+data {
+  int<lower=1> TT;
+  int y[TT];
+
+  int ncov;
+  matrix[TT, ncov] x;
+
+  vector[TT] bsize;
+  vector[TT] sex;
+
+  int nsharks;
+  int sharkid[TT];
+}
+
+parameters {
+  //real<lower=0, upper=1> p;
+  vector[ncov] beta[nsharks];
+  vector[3] alpha;
+
+  real mu;
+  real<lower=0> stdev;
+
+}
+
+model {
+
+  // Priors
+  mu ~ normal(0, 0.1);
+  stdev ~ normal(0, 0.1); // by default, this is the half-normal (because stdev > 0)
+
+
+  alpha[1] ~ normal(0, 0.5);
+  alpha[2] ~ normal(0, 0.5);
+  //values range from -40 to 40
+  alpha[3] ~ normal(0, 0.1);
+
+  for(n in 1:nsharks){
+    beta[n] ~ normal(mu, stdev);
+  }
+
+ for(t in 1:TT){
+   y[t] ~ bernoulli_logit(alpha[1] + alpha[2]*sex[t] + alpha[3]*bsize[t] + x[t]*beta[sharkid[t]]);
+ }
+
+}
+
diff --git a/StanCourseVLB/runningstancode_vlb1.Rmd b/StanCourseVLB/runningstancode_vlb1.Rmd
@@ -11,12 +11,14 @@ knitr::opts_chunk$set(echo = TRUE)
 library(rstan)
 library(ggplot2)
 library(tidyverse)
+options(mc.cores = parallel::detectCores())
+rstan_options(auto_write = TRUE)
 
 ```
 
 ## Simulating Data
 
-```{r, simulatedata}
+```{r, simulatedata, warning=FALSE, message=FALSE}
 set.seed(17)
 
 
@@ -46,19 +48,143 @@ for(n in 1:20){
 }
 
 
-sharkfun <- bind_rows(y)
+sharkfun <- dplyr::bind_rows(y)
 
 ```
 
 
 ## Running the Stan models
 
+```{r}
+data1 <- list(TT = dim(sharkfun)[1], 
+              y= sharkfun$Presence)
+fit1 <- stan(file = "logregmodel1.stan", data=data1)
+fit1
+```
+
+
+```{r}
+data2 <- list(TT = dim(sharkfun)[1], 
+              y=sharkfun$Presence, 
+              ncov = 2, 
+              x = cbind(1, sharkfun$timecos, sharkfun$timesin))
 
+fit2 <- stan(file = "logregmodel2.stan", data=data2)
+
+```
 
 
 ```{r}
-data1 <- list(y= sharkfun$Presence)
-fit1 <- stan(file = "logregmodel1.stan", data=data1)
-fit1
+data3 <- list(TT = dim(sharkfun)[1], 
+              y=sharkfun$Presence, 
+              ncov = 2, 
+              x = cbind(sharkfun$timecos, sharkfun$timesin), 
+              bsize = sharkfun$Size - 220, 
+              sex = sharkfun$Sex)
+
+fit3 <- stan(file = "logregmodel3.stan", data=data3)
+
+```
+
+```{r}
+data4 <- list(TT =  dim(sharkfun)[1], 
+              y=sharkfun$Presence, 
+              sexmissing = as.numeric(sharkfun$Shark == "Shark 3"),
+              ncov = 2, 
+              x = cbind(sharkfun$timecos, sharkfun$timesin), 
+              bsize = sharkfun$Size - 220, 
+              sex = sharkfun$Sex)
+
+fit4 <- stan(file = "logregmodel4.stan", data=data4)
+
+```
+
+
+```{r}
+
+data5 <- list(TT =  dim(sharkfun)[1], 
+              y=sharkfun$Presence, 
+              ncov = 2, 
+              x = cbind(sharkfun$timecos, sharkfun$timesin), 
+              bsize = sharkfun$Size - 220, 
+              sex = sharkfun$Sex, 
+              nsharks = 20, 
+              sharkid = rep(1:20, each=365))
+
+fit5 <- stan(file="logregmodel5.stan", data=data5)
+
+
+
+
 ```
 
+
+## projpred
+
+```{r}
+
+library(rstanarm)
+library(projpred)
+library(ggplot2)
+library(bayesplot)
+theme_set(theme_classic())
+
+
+n <- 7300
+D <- 5
+p0 <- 2 # prior guess for the number of relevant variables
+tau0 <- p0/(D-p0) * 1/sqrt(n) 
+# regularized horseshoe prior
+prior_coeff <- hs(global_scale = tau0, slab_scale = 1) 
+
+#----------------------------------------------------------------------
+## FITTING THE MODELS
+
+logregcov.horseshoe <- stan_glm(formula = Presence~., 
+                      family = binomial(link="logit"), 
+                      data = sharkfun%>%dplyr::select(-Shark), 
+                      prior = hs(global_scale = 0.007802743, 
+                                 slab_scale = 1))
+
+logregcov.defaultpriors <- stan_glm(formula = Presence~., 
+                      family = binomial(link="logit"), 
+                      data = sharkfun%>%dplyr::select(-Shark))
+
+logregcov.n01 <- stan_glm(formula = Presence~., 
+                      family = binomial(link="logit"), 
+                      data = sharkfun%>%dplyr::select(-Shark), 
+                      prior = normal(0,1))
+
+
+#----------------------------------------------------------------------
+## K-FOLD CROSS-VALIDATION
+
+cvs.horseshoe <- cv_varsel(logregcov.horseshoe, 
+                           method='forward', 
+                           cv_method='kfold', K=5)
+
+cvplot.horseshoe <- varsel_plot(cvs.horseshoe, 
+                                stats=c('elpd, acc'))  + 
+  theme_minimal() + theme(text=element_text(size=15), 
+                          legend.position = "none") + 
+  ggtitle("Classification Accuracy (5-fold CV)")
+
+cvplot.horseshoe
+
+suggest_size(cvs)
+
+vs <- varsel(logregcov.horseshoe, method='forward')
+vs$vind
+varsel_plot(vs, stats=c('elpd', 'acc'), deltas=F)
+
+#----------
+
+cvs.horseshoe$vind
+
+```
+
+
+
+
+
+
diff --git a/StanCourseVLB/stanhour3.Rmd b/StanCourseVLB/stanhour3.Rmd
@@ -345,7 +345,7 @@ model{
                     x[t]*beta));
   } else {
     y[t] ~ bernoulli_logit(alpha[1] +
-                    alpha[2]*sex[t]
+                    alpha[2]*sex[t] + 
                     alpha[3]*bsize[t] +
                     x[t]*beta);
   }
@@ -385,13 +385,13 @@ Changes: have an index for individual shark and $\beta_1$ is now a hierarchical
 ```{stan, output.var = "ex4", eval=F}
 data{
    ...
-  int no.sharks;
+  int nsharks;
   vector[TT] sharkid;
 }
 
 parameters{
   ...
-  vector[no.sharks] beta;
+  vector[nsharks] beta;
   real betamu;
   real<lower=0> betasig;
 }
@@ -466,7 +466,7 @@ https://mc-stan.org/projpred/articles/quickstart.html
 Back to the fixed effects for a minute: 
 
 ```{r, eval=F}
-n <- 4000
+n <- 7300
 D <- 5
 p0 <- 2 # prior guess for the number of relevant variables
 tau0 <- p0/(D-p0) * 1/sqrt(n) 
@@ -477,7 +477,7 @@ prior_coeff <- hs(global_scale = tau0, slab_scale = 1)
 logregcov.horseshoe <- stan_glm(formula = Presence~., 
                       family = binomial(link="logit"), 
                       data = sharkfun, 
-                      prior = hs(global_scale = 0.03049858, 
+                      prior = hs(global_scale = 0.007802743, 
                                  slab_scale = 1))
 
 logregcov.defaultpriors <- stan_glm(formula = Presence~.,