first commit

hyejung0 · Mar 14, 2023 · 4dd13b7 · 4dd13b7
commit 4dd13b7
Show file tree

Hide file tree

Showing 32 changed files with 1,929 additions and 0 deletions.
diff --git a/boots.lmer.Rcheck/00_pkg_src/boots.lmer/DESCRIPTION b/boots.lmer.Rcheck/00_pkg_src/boots.lmer/DESCRIPTION
@@ -0,0 +1,19 @@
+Package: boots.lmer
+Title: Linear Mixed Effects Models with Bootstrap Samples
+Version: 0.0.0.9000
+Authors@R: 
+    person("Hyejung", "Lee", , "[email protected]", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0002-5293-4134"))
+Description: boots.lmer function returns a list of linear mixed effects models. The length of the list is the same number as the number of bootstrap samples.  
+License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
+        license
+Encoding: UTF-8
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
+Suggests: knitr, rmarkdown
+VignetteBuilder: knitr
+Imports: data.table
+NeedsCompilation: no
+Packaged: 2023-03-14 21:35:01 UTC; u1317537
+Author: Hyejung Lee [aut, cre] (<https://orcid.org/0000-0002-5293-4134>)
+Maintainer: Hyejung Lee <[email protected]>
diff --git a/boots.lmer.Rcheck/00_pkg_src/boots.lmer/NAMESPACE b/boots.lmer.Rcheck/00_pkg_src/boots.lmer/NAMESPACE
@@ -0,0 +1,5 @@
+# Generated by roxygen2: do not edit by hand
+
+export(boots.lmer)
+export(boots.samples)
+import(data.table)
diff --git a/boots.lmer.Rcheck/00_pkg_src/boots.lmer/R/boots.lmer.R b/boots.lmer.Rcheck/00_pkg_src/boots.lmer/R/boots.lmer.R
@@ -0,0 +1,56 @@
+
+
+#' Bootstrap Sampled Linear Mixed Effects Models (LMERs)
+#' @return A list of length B, of linear mixed effects model fits.
+#' @param boots.samples.list boots.samples output.
+#' @param y character string. Name of column in each data.table of boots.samples.list, to be used as the outcome in LMER.
+#' @param X a vector of character string.  Names of the of covariates to fit in LMER, as they appear on each dataset of boots.samples.list.
+#' @param use.formula a vector of class formula. This is to be used in lmer fit. If not provided, then a basic additive fixed effects model with random effect of (1|no.repeat.sub.id) will be fit.
+#' @examples
+#' example.subject<-c("Sarah","John","Beth","Anna","Sarah","Sarah","Chris","Blake","John","Anna")
+#' example.dat<-data.frame("Y"=rnorm(n=length(example.subject)),
+#'                        "X"=rpois(n=length(example.subject), lambda = 3),
+#'                        "subjects"=example.subject)
+#' output<-boots.samples(dat=example.dat,sub.id = "subjects",B=4) #create 4 bootstrap samples
+#' lmer.out<-boots.lmer(y="Y", X=c("X1","X2","X3"), boots.samples.list = output)
+#' @export
+boots.lmer<-function(y,X,boots.samples.list,use.formula=NULL){
+library(data.table)
+
+  lmer.fit<-function(y,X,dat,use.formula=NULL){#a function that fits one LMER
+
+
+    if(!is.null(use.formula)){#If we have a specific formula we want to use for lmer, then we use it.
+      dat[,lme4::lmer(use.formula,data=dat, REML=FALSE)]
+    }else{ #if not, then we will fit a basic function with random effect with common intercept, etc.
+      #Make sure we use backtick quotes in case the names have spaces and special characters
+      X.names<-lapply(X, as.name)
+      #If we have more than one covariate, concatenate them with +
+      if(length(X)>1){
+        X.names<-paste(X.names, collapse=" + ")
+        X.formula<-as.formula(paste0("~ ", paste(X.names, collapse=" + ")))
+      }
+      model_formula1<-formula(paste0("`",rlang::quo_name(y),"`", "~", rlang::quo_name(X.names),"+(1|no.repeat.sub.id)"))
+      dat[,lme4::lmer(model_formula1,data=dat, REML=FALSE)]
+    }
+
+  }
+
+
+  #CRAN limits the number of cores available to packages to 2, for performance reasons. There was a thread in the mailing list, I believe, but I can't find it right now.
+  chk <- Sys.getenv("_R_CHECK_LIMIT_CORES_", "")
+
+  if (nzchar(chk) && chk == "TRUE") {
+    # use 2 cores in CRAN/Travis/AppVeyor
+    num_workers <- 2L
+  } else {
+    # use all cores in devtools::test()
+    num_workers <- parallel::detectCores()
+  }
+
+
+  parallel::mclapply(boots.samples.list,function(boots.dat){
+    lmer.fit(y=y,X=X,dat=boots.dat)}
+    ,mc.cores = num_workers)
+
+}
diff --git a/boots.lmer.Rcheck/00_pkg_src/boots.lmer/R/boots.samples.R b/boots.lmer.Rcheck/00_pkg_src/boots.lmer/R/boots.samples.R
@@ -0,0 +1,57 @@
+
+
+#' Bootstrap Samples
+#' @return A list of length B, of matrices with index and subjects
+#' @param dat data frame or data table of our interest
+#' @param sub.id String vector, the column name of the subject ID's.
+#' @param B Natural number. Number of bootstrap samples to create.
+#' @examples
+#' example.subject<-c("Sarah","John","Beth","Anna","Sarah","Sarah","Chris","Blake","John","Anna")
+#' example.dat<-data.frame("Y"=rnorm(n=length(example.subject)),
+#'                         "X1"=rpois(n=length(example.subject), lambda = 3),
+#'                         "X2"=rnorm(n=length(example.subject)),
+#'                         "X3"=rbeta(n=length(example.subject), shape1 = 3, shape2 = 0.5),
+#'                         "subjects"=example.subject)
+#' output<-boots.samples(dat=example.dat,sub.id = "subjects",B=4) #create 4 bootstrap samples
+#' output[[1]] #This is the first one!
+#' @import data.table
+#' @export
+boots.samples<-function(dat, sub.id,B){
+
+  sub.id<-dat[,sub.id]
+  #collect unique subject ID's
+  unique.id<-unique(sub.id)
+
+  #sample unique subject ID, B number of times
+  this.index<-sapply(1:B,function(e){sample(unique.id, size = length(unique.id),replace = TRUE)})
+
+  #we need to take a note of people who are sampled more than once to make sure that they are sampled from the data as if they are different individuals.
+  no.repeat.id<-
+    apply(this.index,2, function(x){
+      temp<-table(x)
+      for(i in seq_along(temp)){
+        if(temp[i]>1){ #if the subject id appears more than once
+          num.appearance<-temp[i] #number of times the subject appears in the sample
+          x[which(x==names(temp)[i])]<-paste0(names(temp)[i],"__",1:num.appearance) #generate new subject ID by concatenating "_#" at the end of the id. Replace the old name with this
+        }
+      }
+      x #return the list of sampled subject ID , now all sample should be unique subject ID.
+    })
+
+
+  output<-
+    apply(no.repeat.id,2,function(x){
+      temp<-
+        sapply(x,function(x_i){ #for each subject
+          index<-which(sub.id==stringr::word(x_i,1,sep = "\\__")) #index of original data where the subject id appears
+          cbind(index, "no.repeat.id"=rep(x_i,length(index)))}) #get all observation for this particular subject.
+
+      #bind the index and no.repeat.id. as rows
+      temp.dat<-do.call(rbind, lapply(temp, data.table::data.table))
+      complete.dat<-data.table::as.data.table(dat[temp.dat$index,])
+      complete.dat[,no.repeat.sub.id:=(temp.dat$no.repeat.id)] # add the new subject ID column, with no repetition
+    }
+    )
+
+  output
+}
diff --git a/boots.lmer.Rcheck/00_pkg_src/boots.lmer/build/vignette.rds b/boots.lmer.Rcheck/00_pkg_src/boots.lmer/build/vignette.rds
diff --git a/boots.lmer.Rcheck/00_pkg_src/boots.lmer/inst/doc/create_bootstrap_samples.R b/boots.lmer.Rcheck/00_pkg_src/boots.lmer/inst/doc/create_bootstrap_samples.R
@@ -0,0 +1,36 @@
+## ---- include = FALSE---------------------------------------------------------
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+
+
+options(rmarkdown.html_vignette.check_title = FALSE)
+
+## ----setup--------------------------------------------------------------------
+library(boots.lmer)
+
+## -----------------------------------------------------------------------------
+set.seed(1020)
+example.subject<-c("Sarah","John","Beth","Anna","Sarah","Sarah","Chris","Blake","John","Anna")
+example.dat<-data.frame("Y"=rnorm(n=length(example.subject)),
+                        "X1"=rpois(n=length(example.subject), lambda = 3),
+                        "X2"=rnorm(n=length(example.subject)),
+                        "X3"=rbeta(n=length(example.subject), shape1 = 3, shape2 = 0.5),
+                        "subjects"=example.subject)
+knitr::kable(example.dat, caption = "Table 1. Example data set.")
+
+## -----------------------------------------------------------------------------
+output<-boots.samples(dat = example.dat, sub.id = "subjects",B=4)
+
+output
+# knitr::kable(output,caption = "Table 2. Four boostrap samples of example.dat data sets. The sampling unit is subject, hence the number of rows of each data set is expected to be different from one another.",row.names = FALSE)
+
+## -----------------------------------------------------------------------------
+
+output[[1]]
+# knitr::kable(output[[1]],caption = "Table 2.A. The first bootstrap sample",row.names = FALSE)
+
+## -----------------------------------------------------------------------------
+# example.dat[output[[1]]$index,]
+
diff --git a/boots.lmer.Rcheck/00_pkg_src/boots.lmer/inst/doc/create_bootstrap_samples.Rmd b/boots.lmer.Rcheck/00_pkg_src/boots.lmer/inst/doc/create_bootstrap_samples.Rmd
@@ -0,0 +1,86 @@
+---
+title: "create_bootstrap_samples"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{create_bootstrap_samples}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+
+
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+
+```{r setup }
+library(boots.lmer)
+```
+
+
+This vignette contains a walk-through of `boots.lmer::boots.samples` function.
+
+
+The `boots.samples` function produces a list of bootstrap samples, made especially for longitudinal data. Longitudinal data formatted where multiple observations should be chosen cautiously when sampling for boostrap:
+
+1. sampling unit should be subject. That is, if one subject is chosen in the boostrap sample, the subject's entire observations should be included. 
+2. the returning subject's name should be different if they were selected more than once, to use for random effects. 
+
+Here's the example. The below example can be found in `help(boots.samples)` page as well.
+
+<br>
+<br>
+
+# Generate example data
+
+Here's an example data:
+```{r}
+set.seed(1020)
+example.subject<-c("Sarah","John","Beth","Anna","Sarah","Sarah","Chris","Blake","John","Anna")
+example.dat<-data.frame("Y"=rnorm(n=length(example.subject)),
+                        "X1"=rpois(n=length(example.subject), lambda = 3),
+                        "X2"=rnorm(n=length(example.subject)),
+                        "X3"=rbeta(n=length(example.subject), shape1 = 3, shape2 = 0.5),
+                        "subjects"=example.subject)
+knitr::kable(example.dat, caption = "Table 1. Example data set.")
+```
+
+The example dataset contains total of 6 subjects (Sarah, John, Beth, Anna, Chris, and Blck), with some repeating observations from few subjects.
+
+
+
+# Creating bootstrap samples using a longitudinal data
+
+
+Here's `boots.samples` output, if we use `example.dat$subjects` as our subject ID to create 4 bootstrap samples.:
+
+
+```{r}
+output<-boots.samples(dat = example.dat, sub.id = "subjects",B=4)
+
+output
+# knitr::kable(output,caption = "Table 2. Four boostrap samples of example.dat data sets. The sampling unit is subject, hence the number of rows of each data set is expected to be different from one another.",row.names = FALSE)
+```
+
+The output is a list of 4, each list being a data frame. Each data frame contains two columns: **index** and **no.repeat.id.** We will explain each column using the first bootstrap sample:
+
+```{r}
+
+output[[1]]
+# knitr::kable(output[[1]],caption = "Table 2.A. The first bootstrap sample",row.names = FALSE)
+```
+
+In the first sample, total 6 subjects are chosen, but the number of rows is different from the original data set. The original data set has 10 rows, but the first bootstrap sample has 11 rows. This is because again, our sample of unit is subject, where there are different number of observations for each subject.   
+
+The **index** column shows the row number of the original data set for each row of the bootstrap sample. For example, the first two rows belong to John, with index 2 and 9. We can get the bootstrap data using these index. 
+
+```{r}
+# example.dat[output[[1]]$index,]
+```
+
+The **index** can be used to select the right observation from the original data set. However, if we are to use the subjects as a random effect in our linear mixed effects model, the subjects has to be distinguished each time it is chosen. For example, in our first bootstrap sample, John is chosen 3 times. If we use `John` as is in the random effects, R will not be able to distinguish these 3 different `John`s. Therefore, we need to name the 3 `John`s differently. If we look at **no.repeat.id** column in Table 2.A, the different names are assigned with `__#` format, where `#` is the number of times the subject was sampled. Hence, we see that we have `John__1`, `John__2`, and `John__3` to take into account `John` as 3 different subjects.