From bf2754262650405921ccc79d15c7b16f6e4981f8 Mon Sep 17 00:00:00 2001
From: Felicia <fishkek@LAPTOP-MPPBQASA.localdomain>
Date: Fri, 4 Jun 2021 15:11:28 +0800
Subject: [PATCH] edited the code in effsize file(effsize boot function) to
 account for paired and unpaired cases

---
 R/effectsize.R | 1376 ++++++++++++++++++++++++------------------------
 1 file changed, 694 insertions(+), 682 deletions(-)

diff --git a/R/effectsize.R b/R/effectsize.R
index 75cdcdf..d9ae41b 100755
--- a/R/effectsize.R
+++ b/R/effectsize.R
@@ -1,682 +1,694 @@
-#' Compute Effect Size(s)
-#'
-#' For each pair of observations in a \code{dabest} object, a desired effect
-#' size can be computed. Currently there are five effect sizes available:
-#' \itemize{ \item The \strong{mean difference}, given by \code{mean_diff()}.
-#' \item The \strong{median difference}, given by \code{median_diff()}. \item
-#' \strong{Cohen's \emph{d}}, given by \code{cohens_d()}. \item \strong{Hedges'
-#' \emph{g}}, given by \code{hedges_g()}. \item \strong{Cliff's delta}, given by
-#' \code{cliffs_delta()}. }
-#'
-#'
-#' @param x A \code{dabest} object, generated by the \link[=dabest]{dabest()}
-#'   function.
-#'
-#' @param ci float, default 95. The level of the confidence intervals produced.
-#'   The default \code{ci = 95} produces 95\% CIs.
-#'
-#' @param reps integer, default 5000. The number of bootstrap resamples that
-#'   will be generated.
-#'
-#' @param seed integer, default 12345. This specifies the seed used to set the
-#'   random number generator. Setting a seed ensures that the bootstrap
-#'   confidence intervals for the same data will remain stable over separate
-#'   runs/calls of this function. See \link{set.seed} for more details.
-#'
-#'
-#'
-#' @return A \code{dabest_effsize} object with 10 elements.
-#'
-#'   \describe{
-#'
-#'   \item{\code{data}}{ The dataset passed to \link[=dabest]{dabest()}, as a
-#'   \code{\link[tibble]{tibble}}. }
-#'
-#'   \item{\code{x} and \code{y}}{ The columns in \code{data} used to plot the x
-#'   and y axes, respectively, as supplied to \link[=dabest]{dabest()}. These are
-#'   \href{https://adv-r.hadley.nz/quasiquotation.html}{quoted variables} for
-#'   \href{https://tidyeval.tidyverse.org/}{tidy evaluation} during the
-#'   computation of effect sizes. }
-#'
-#'   \item{\code{idx}}{ The vector of control-test groupings as initially passed
-#'   to \link[=dabest]{dabest()}. }
-#'
-#'   \item{\code{is.paired}}{ Whether or not the experiment consists of paired
-#'   (aka repeated) observations. Originally supplied to \link[=dabest]{dabest()}. }
-#'
-#'   \item{\code{id.column}}{ If \code{is.paired} is \code{TRUE}, the column in
-#'   \code{data} that indicates the pairing of observations. As passed to
-#'   \link[=dabest]{dabest()}. }
-#'
-#'   \item{\code{effect.size}}{ The effect size being computed. One of the
-#'   following: \code{c("mean_diff", "median_diff", "cohens_d", "hedges_g",
-#'   "cliffs_delta")}. }
-#'
-#'   \item{\code{.data.name}}{ The variable name of the dataset passed to
-#'   \link[=dabest]{dabest()}. }
-#'
-#'   \item{\code{summary}}{ A \link{tibble} with a row for the mean or median of
-#'   each group in the \code{x} column of \code{data}, as indicated in
-#'   \code{idx}. }
-#'
-#'   \item{\code{result}}{
-#'
-#'   A \link{tibble} with the following 15 columns:
-#'
-#'   \describe{ \item{control_group, test_group}{ The name of the control group
-#'   and test group respectively.}
-#'
-#'   \item{control_size, test_size}{ The number of observations in the control
-#'   group and test group respectively. }
-#'
-#'   \item{effect_size}{ The effect size used. }
-#'
-#'   \item{paired}{ Is the difference paired (\code{TRUE}) or not
-#'   (\code{FALSE})? }
-#'
-#'   \item{difference}{ The effect size of the difference between the two
-#'   groups. }
-#'
-#'   \item{variable}{ The variable whose difference is being computed, ie. the
-#'   column supplied to \code{y}. }
-#'
-#'   \item{ci}{ The \code{ci} passed to this function. }
-#'
-#'   \item{bca_ci_low, bca_ci_high}{ The lower and upper limits of the Bias
-#'   Corrected and Accelerated bootstrap confidence interval. }
-#'
-#'   \item{pct_ci_low, pct_ci_high}{ The lower and upper limits of the
-#'   percentile bootstrap confidence interval. }
-#'
-#'   \item{bootstraps}{ The vector of bootstrap resamples generated. } } }
-#'
-#'   }
-#'
-#'
-#' @seealso \itemize{
-#'
-#'   \item \link[=dabest]{Loading data} for effect size computation.
-#'
-#'   \item \link[=plot.dabest_effsize]{Generating estimation plots} after effect
-#'   size computation.
-#'
-#'   \item The
-#'   \href{http://www.estimationstats.com/#/about-effect-sizes}{mathematical
-#'   definitions} and equations used to compute each effect size.
-#'
-#'   \item The \code{ \link[effsize:effsize-package]{effsize} } package, which
-#'   is used under the hood to compute Cohen's \emph{d}, Hedges' \emph{g}, and
-#'   Cliff's delta.
-#'
-#'   \item The \code{ \link[boot]{boot}() } and \code{ \link[boot]{boot.ci}() }
-#'   functions from the \code{boot} package, which generate the (nonparametric)
-#'   bootstrapped resamples used to compute the confidence intervals.
-#'
-#'   }
-#'
-#'
-#' @examples
-#' # Loading data for unpaired (two independent groups) analysis.
-#' petal_widths <- dabest(iris, Species, Petal.Width,
-#'                        idx = c("setosa", "versicolor"),
-#'                        paired = FALSE)
-#'
-#'
-#' # Compute the mean difference.
-#' mean_diff(petal_widths)
-#'
-#' # Plotting the mean differences.
-#' mean_diff(petal_widths) %>% plot()
-#'
-#' @export
-mean_diff    <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("mean_diff", x)
-
-#' @export
-mean_diff.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
-  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "mean_diff")
-}
-
-
-#' @rdname mean_diff
-#' @export
-median_diff  <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("median_diff", x)
-
-#' @export
-median_diff.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
-  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "median_diff")
-}
-
-
-#' @rdname mean_diff
-#' @export
-cohens_d     <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("cohens_d", x)
-
-#' @export
-cohens_d.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
-  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "cohens_d")
-}
-
-
-
-#' @rdname mean_diff
-#' @export
-hedges_g     <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("hedges_g", x)
-
-#' @export
-hedges_g.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
-  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "hedges_g")
-}
-
-
-
-#' @rdname mean_diff
-#' @export
-cliffs_delta <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("cliffs_delta", x)
-
-#' @export
-cliffs_delta.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
-  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "cliffs_delta")
-}
-
-
-#### Helper functions that actually compute the effect sizes. ####
-mean_diff_ <- function(control, treatment, paired) {
-  if (identical(paired, FALSE)) return(mean(treatment) - mean(control))
-  else return(mean(treatment - control))
-}
-
-
-
-median_diff_ <- function(control, treatment, paired) {
-  if (identical(paired, FALSE)) return(median(treatment) - median(control))
-  else return(median(treatment - control))
-}
-
-
-
-cohens_d_ <- function(control, treatment, paired) {
-  return(effsize::cohen.d(treatment, control, paired=paired)$estimate)
-}
-
-
-
-hedges_g_ <- function(control, treatment, paired) {
-  cd <- cohens_d_(treatment, control, paired=paired)
-  # not sure why I have to negate this....
-  corr.factor <- -hedges_correction(treatment, control)
-  return(cd * corr.factor)
-}
-
-
-
-cliffs_delta_ <- function(control, treatment, paired=NA) {
-  return(effsize::cliff.delta(treatment, control)$estimate)
-}
-
-
-#' Returns the exact Hedges' correction factor for Cohen's d.
-#' @param x1 A vector of values.
-#'
-#' @param x2 Another vector of values.
-#'
-#' @return Hedges' correction for the g of x1 and x2.
-#'
-hedges_correction <- function(x1, x2) {
-
-  n1 <- length(x1)
-  n2 <- length(x2)
-
-  deg.freedom <- n1 + n2 - 2
-  numer       <- gamma(deg.freedom/2)
-  denom0      <- gamma((deg.freedom - 1) / 2)
-  denom       <- sqrt((deg.freedom / 2)) * denom0
-
-  if (is.infinite(numer) | is.infinite(denom)) {
-    # Occurs when df is too large.
-    # Applies Hedges and Olkin's approximation.
-    df.sum <- n1 + n2
-    denom <- (4 * df.sum) - 9
-    out <- 1 - (3 / denom)
-  } else out <- numer / denom
-
-  return(out)
-}
-
-
-
-#' Compute the standardizers for Cohen's d
-#' @param x1 A vector of values.
-#'
-#' @param x2 Another vector of values.
-#'
-#' @return Named list for pooled and average standardizers.
-#' Use the pooled one for unpaired Cohens' d, and the average one
-#' for paired Cohens'd d.
-#'
-cohen_d_standardizers <- function(x1, x2) {
-  control_n    <- length(x1)
-  test_n       <- length(x2)
-
-  control_mean <- mean(x1)
-  test_mean    <- mean(x2)
-
-  control_var  <- var(x1) # by default, use N-1 to compute the variance.
-  test_var     <- var(x2)
-
-  control_std  <- sd(x1)
-  test_std     <- sd(x2)
-
-  # For unpaired 2-groups standardized mean difference.
-  pooled <- sqrt(
-    ((control_n - 1) * control_var + (test_n - 1) * test_var) /
-      (control_n + test_n - 2)
-  )
-
-  # For paired standardized mean difference.
-  average <- sqrt((control_var + test_var) / 2)
-
-  # out <- c(pooled, average)
-  # names(out) <- c("pooled", "average")
-
-  return(list(pooled = pooled, average = average))
-
-}
-
-
-
-
-effsize_boot <- function(data, effsize_func, R = 5000, paired = FALSE) {
-
-  func <- match.fun(effsize_func)
-
-  # Create stata for resampling.
-  s <- c(rep(1, length(data$control)),
-         rep(2, length(data$test)))
-
-
-  bootboot <- function(d, indicies, paired) {
-    c <- d[indicies[s == 1]]
-    t <- d[indicies[s == 2]]
-
-    return(func(c, t, paired))
-  }
-
-  b <- boot(
-    c(data$control, data$test),
-    statistic = bootboot,
-    R = R,
-    strata = s,
-    paired = paired)
-
-  return(b)
-
-}
-
-
-# effsize_boot <- function(effsize, data, paired, indices) {
-#   control <- data$control[indices]
-#   test <- data$test[indices]
-#
-#   cd <- effsize(control, test, paired)
-#
-#   return(cd)
-# }
-
-
-
-#' @importFrom boot boot boot.ci
-#' @importFrom dplyr arrange bind_rows filter group_by summarize
-#' @importFrom magrittr %>%
-#' @importFrom rlang quo_name
-#' @importFrom stringr str_interp
-#' @importFrom tibble tibble
-effect_size <- function(.data, ..., effect.size, ci, reps, seed) {
-
-  #### Check object class ####
-  if (class(.data)[1] != "dabest") {
-    stop(paste(
-      "The object you are plotting is not a `dabest` class object. ",
-      "Please check again! ")
-    )
-  }
-
-  # Create handles for easy access to the items in `.data`.
-  raw.data            <-  .data$data
-  idx                 <-  .data$idx
-  all.groups          <-  .data$.all.groups
-  paired              <-  .data$is.paired
-  data.name           <-  .data$.data.name
-  is.paired           <-  .data$is.paired
-
-  plot.groups.sizes   <-  unlist(lapply(idx, length))
-
-  # The variables below should are quosures!
-  x_enquo             <-  .data$x
-  y_enquo             <-  .data$y
-  effect.size_enquo   <-  quo_name(effect.size)
-
-  x_quoname           <-  quo_name(x_enquo)
-  y_quoname           <-  quo_name(y_enquo)
-
-  # effect.size_enquo   <-  rlang::enquo(effect.size)
-  # effect.size_quoname <-  rlang::quo_name(effect.size_enquo)
-
-  id.col_enquo        <-  .data$id.column
-
-  # Parse the effect.size.
-  if (effect.size == "mean_diff") {
-    es = mean_diff_
-    summ_func = mean
-    summ_name = "mean"
-  } else if (effect.size == "median_diff") {
-    es = median_diff_
-    summ_func = median
-    summ_name = "median"
-  } else if (effect.size == "cohens_d") {
-    es = cohens_d_
-    summ_func = mean
-    summ_name = "mean"
-  } else if (effect.size == "hedges_g") {
-    es = hedges_g_
-    summ_func = mean
-    summ_name = "mean"
-  } else if (effect.size == "cliffs_delta") {
-    es = cliffs_delta_
-    summ_func = median
-    summ_name = "median"
-  }
-
-
-
-  #### Loop through each comparison group. ####
-  result <- tibble() # To capture output.
-
-  for (group in idx) {
-
-    # Check the control group (`group[1]`) is in the x-column.
-    if (identical(group[1] %in% raw.data[[x_quoname]], FALSE)) {
-
-      err1 <- str_interp("${group[1]} is not found")
-      err2 <- str_interp("in the ${x_quoname} column.")
-
-      stop(paste(err1, err2))
-    }
-
-    # Patch in v0.2.2.
-    # Note how we have to unquote both the x_enquo, and the group name!
-    ctrl <- raw.data %>% filter(!!x_enquo == !!group[1])
-
-    ctrl <- ctrl[[y_quoname]]
-
-    c <- na.omit(ctrl)
-
-    # If ctrl is length 0, stop!
-    if (length(c) == 0) {
-      stop(
-        str_interp(
-          c("There are zero numeric observations in the group ${group[1]}.")
-        )
-      )
-    }
-
-    # Get test groups (everything else in group), loop through them and compute
-    # the difference between group[1] and each group.
-    # Test groups are the 2nd element of group onwards.
-    test_groups <- group[2: length(group)]
-
-    for (grp in test_groups) {
-
-      # Check if the current group is in the x-column.
-      if (identical(grp %in% raw.data[[x_quoname]], FALSE)) {
-        stop(
-          str_interp(
-            "${grp} is not found in the ${x_quoname} column."
-          )
-        )
-      }
-
-      # Patch in v0.2.2.
-      # Note how we have to unquote both x_enquo, and grp!
-      test <- raw.data %>% filter(!!x_enquo == !!grp)
-      test <- test[[y_quoname]]
-      t <- na.omit(test)
-
-      # If current test group is length 0, stop!
-      if (length(t) == 0) {
-        stop(
-          str_interp(
-            c("There are zero numeric observations in the group ${grp}.")
-          )
-        )
-      }
-
-
-
-      #### Compute bootstrap. ####
-      datalist <- list(control = ctrl, test = test)
-
-      set.seed(seed)
-      boot_result <- effsize_boot(datalist, effsize_func = es,
-                                  R = reps, paired = paired)
-
-      set.seed(NULL)
-
-      # if (identical(paired, FALSE)) {
-      #   diff <- func(t) - func(c)
-      #   # For two.boot, note that the first vector is the test vector.
-      #   boot <- simpleboot::two.boot(t, c, FUN = func, R = reps)
-      #
-      # } else {
-      #   if (length(c) != length(t)) {
-      #     stop("The two groups are not the same size, but paired = TRUE.")
-      #   }
-      #   paired_diff <- t - c
-      #   diff <- func(paired_diff)
-      #   boot <- simpleboot::one.boot(paired_diff, FUN = func, R = reps)
-      # }
-
-
-
-      #### Compute confidence interval. ####
-      # check CI.
-      if (ci < 0 | ci > 100) {
-        err_string <- str_interp(
-          "`ci` must be between 0 and 100, not ${ci}"
-        )
-        stop(err_string)
-      }
-
-      bootci <- boot.ci(boot_result, conf = ci/100, type = c("perc", "bca"))
-
-
-      #### Save pairwise result. ####
-      row <- tibble(
-        # Convert the name of `func` to a string.
-        control_group = group[1],
-        test_group = grp,
-        control_size = length(c),
-        test_size = length(t),
-        effect_size = effect.size,
-        paired = paired,
-        variable = y_quoname,
-        difference = boot_result$t0,
-        ci = ci,
-        bca_ci_low = bootci$bca[4],
-        bca_ci_high = bootci$bca[5],
-        pct_ci_low = bootci$percent[4],
-        pct_ci_high = bootci$percent[5],
-        bootstraps = list(as.vector(boot_result$t)),
-        nboots = length(boot_result$t)
-      )
-      result <- bind_rows(result, row)
-    }
-  }
-
-  # Reset seed.
-  set.seed(NULL)
-
-
-
-  #### Compute summaries. ####
-  summaries <-
-    raw.data %>%
-    filter(!!x_enquo %in% all.groups) %>%
-    group_by(!!x_enquo) %>%
-    summarize(func_quoname = summ_func(!!y_enquo))
-
-  colnames(summaries) <- c(x_quoname, summ_name)
-
-  # Order the summaries by the idx.
-  summaries[[x_quoname]] <-
-    summaries[[x_quoname]] %>%
-    factor(all.groups, ordered = TRUE)
-
-  summaries <- summaries %>% arrange(!!x_enquo)
-
-
-
-  #### Assemble only the data used to create the plot. ####
-  data.out <- raw.data
-
-  # New in v0.2.1 patch.
-  # Basically, the `ellipsis` package has been updated,
-  # and now forcats::as_factor() should only take the object to coerce.
-  data.out[[x_quoname]] <- forcats::as_factor(data.out[[x_quoname]])
-
-  data.out <- filter(data.out, !!x_enquo %in% all.groups)
-
-
-
-  #### Collate output. ####
-  out = list(
-    data = data.out,
-    x = x_enquo,
-    y = y_enquo,
-    idx = idx,
-    id.column = id.col_enquo,
-    is.paired = is.paired,
-    effect.size = effect.size,
-    .data.name = data.name,
-    result = result,
-    summary = summaries
-  )
-
-
-
-  #### Append the custom class `dabest_effsize`. ####
-  class(out) <- c("dabest_effsize", "list")
-
-
-
-  #### Return the output. ####
-  return(out)
-}
-
-
-
-#' Print a `dabest_effsize` object
-#'
-#' @param x A \code{dabest_effsize} object, generated by one of the
-#'   \link[=mean_diff]{effect size computation} functions.
-#'
-#' @param ... S3 signature for generic plot function.
-#'
-#' @param signif_digits Integer, default 3. All numeric figures in the printed
-#'   output will be rounded off to this number of significant digits.
-#'
-#'
-#' @return A summary of the effect sizes and respective confidence intervals.
-#'
-#' @examples
-#' # Performing unpaired (two independent groups) analysis.
-#' unpaired_mean_diff <- dabest(iris, Species, Petal.Width,
-#'                              idx = c("setosa", "versicolor"),
-#'                              paired = FALSE) %>%
-#'                       mean_diff()
-#'
-#' # Display the results in a user-friendly format.
-#' print(unpaired_mean_diff)
-#'
-#' @export
-#' @importFrom rlang quo_name
-#' @importFrom stringr str_interp
-print.dabest_effsize <- function(x, ..., signif_digits = 3) {
-
-  #### Check object class ####
-  if (class(x)[1] != "dabest_effsize") {
-    stop(paste(
-      "The object you are plotting is not a `dabest_effsize` class object. ",
-      "Please check again! "))
-  } else {
-    dabest.effsize <- x
-  }
-
-  #### Get results table and y var. ####
-  tbl <- dabest.effsize$result
-  var <- quo_name(dabest.effsize$y)
-
-  #### Print greeting header. ####
-  print_greeting_header()
-  # dabest_ver <- utils::packageVersion("dabestr")
-  # header     <- stringr::str_interp(
-  #   "DABEST (Data Analysis with Bootstrap Estimation) v${dabest_ver}\n")
-  # cat(header)
-  #
-  # cat(rep('=', nchar(header) - 1), sep='')
-  # cat("\n\n")
-
-  # cat(stringr::str_interp("Variable: ${var} \n\n"))
-
-  #### Print dataset name, xvar, and yvar. ####
-  xvar = quo_name(dabest.effsize$x)
-  yvar = quo_name(dabest.effsize$y)
-
-  cat(str_interp("Dataset    :  ${dabest.effsize$.data.name}\n"))
-  cat(str_interp("X Variable :  ${xvar}\n"))
-  cat(str_interp("Y Variable :  ${yvar}\n\n"))
-
-  #### Print each row. ####
-  cat(apply(tbl, 1, printrow_, sigdig = signif_digits),
-      sep = "\n")
-
-  #### Endnote about BCa. ####
-  cat(str_interp("${tbl$nboots[1]} bootstrap resamples.\n"))
-  cat("All confidence intervals are bias-corrected and accelerated.\n\n")
-
-}
-
-
-
-#' @importFrom stringr str_interp
-printrow_ <- function(my.row, sigdig = 3) {
-  if (identical(my.row$paired, TRUE)) p <- "Paired" else p <- "Unpaired"
-
-  # For labelling the y-axis.
-  effsizer <- list("mean difference", "median difference",
-                   "Cohen's d", "Hedges' g", "Cliff's delta")
-  names(effsizer) <- c("mean_diff", "median_diff",
-                       "cohens_d", "hedges_g", "cliffs_delta")
-
-  line1 <- str_interp(
-    c(
-      "${p} ${effsizer[[my.row$effect_size]]} of ",
-      "${my.row$test_group} ",
-      "(n = ${my.row$test_size}) ",
-      "minus ${my.row$control_group} ",
-      "(n = ${my.row$control_size})\n"
-    )
-  )
-
-
-  line2 <- str_interp(
-    c("${signif(my.row$difference, sigdig)} ",
-      "[${signif(my.row$ci, sigdig)}CI  ",
-      "${signif(my.row$bca_ci_low, sigdig)}; ",
-      "${signif(my.row$bca_ci_high, sigdig)}]\n\n")
-  )
-
-  cat(line1, line2)
-}
-
-
-
+#' Compute Effect Size(s)
+#'
+#' For each pair of observations in a \code{dabest} object, a desired effect
+#' size can be computed. Currently there are five effect sizes available:
+#' \itemize{ \item The \strong{mean difference}, given by \code{mean_diff()}.
+#' \item The \strong{median difference}, given by \code{median_diff()}. \item
+#' \strong{Cohen's \emph{d}}, given by \code{cohens_d()}. \item \strong{Hedges'
+#' \emph{g}}, given by \code{hedges_g()}. \item \strong{Cliff's delta}, given by
+#' \code{cliffs_delta()}. }
+#'
+#'
+#' @param x A \code{dabest} object, generated by the \link[=dabest]{dabest()}
+#'   function.
+#'
+#' @param ci float, default 95. The level of the confidence intervals produced.
+#'   The default \code{ci = 95} produces 95\% CIs.
+#'
+#' @param reps integer, default 5000. The number of bootstrap resamples that
+#'   will be generated.
+#'
+#' @param seed integer, default 12345. This specifies the seed used to set the
+#'   random number generator. Setting a seed ensures that the bootstrap
+#'   confidence intervals for the same data will remain stable over separate
+#'   runs/calls of this function. See \link{set.seed} for more details.
+#'
+#'
+#'
+#' @return A \code{dabest_effsize} object with 10 elements.
+#'
+#'   \describe{
+#'
+#'   \item{\code{data}}{ The dataset passed to \link[=dabest]{dabest()}, as a
+#'   \code{\link[tibble]{tibble}}. }
+#'
+#'   \item{\code{x} and \code{y}}{ The columns in \code{data} used to plot the x
+#'   and y axes, respectively, as supplied to \link[=dabest]{dabest()}. These are
+#'   \href{https://adv-r.hadley.nz/quasiquotation.html}{quoted variables} for
+#'   \href{https://tidyeval.tidyverse.org/}{tidy evaluation} during the
+#'   computation of effect sizes. }
+#'
+#'   \item{\code{idx}}{ The vector of control-test groupings as initially passed
+#'   to \link[=dabest]{dabest()}. }
+#'
+#'   \item{\code{is.paired}}{ Whether or not the experiment consists of paired
+#'   (aka repeated) observations. Originally supplied to \link[=dabest]{dabest()}. }
+#'
+#'   \item{\code{id.column}}{ If \code{is.paired} is \code{TRUE}, the column in
+#'   \code{data} that indicates the pairing of observations. As passed to
+#'   \link[=dabest]{dabest()}. }
+#'
+#'   \item{\code{effect.size}}{ The effect size being computed. One of the
+#'   following: \code{c("mean_diff", "median_diff", "cohens_d", "hedges_g",
+#'   "cliffs_delta")}. }
+#'
+#'   \item{\code{.data.name}}{ The variable name of the dataset passed to
+#'   \link[=dabest]{dabest()}. }
+#'
+#'   \item{\code{summary}}{ A \link{tibble} with a row for the mean or median of
+#'   each group in the \code{x} column of \code{data}, as indicated in
+#'   \code{idx}. }
+#'
+#'   \item{\code{result}}{
+#'
+#'   A \link{tibble} with the following 15 columns:
+#'
+#'   \describe{ \item{control_group, test_group}{ The name of the control group
+#'   and test group respectively.}
+#'
+#'   \item{control_size, test_size}{ The number of observations in the control
+#'   group and test group respectively. }
+#'
+#'   \item{effect_size}{ The effect size used. }
+#'
+#'   \item{paired}{ Is the difference paired (\code{TRUE}) or not
+#'   (\code{FALSE})? }
+#'
+#'   \item{difference}{ The effect size of the difference between the two
+#'   groups. }
+#'
+#'   \item{variable}{ The variable whose difference is being computed, ie. the
+#'   column supplied to \code{y}. }
+#'
+#'   \item{ci}{ The \code{ci} passed to this function. }
+#'
+#'   \item{bca_ci_low, bca_ci_high}{ The lower and upper limits of the Bias
+#'   Corrected and Accelerated bootstrap confidence interval. }
+#'
+#'   \item{pct_ci_low, pct_ci_high}{ The lower and upper limits of the
+#'   percentile bootstrap confidence interval. }
+#'
+#'   \item{bootstraps}{ The vector of bootstrap resamples generated. } } }
+#'
+#'   }
+#'
+#'
+#' @seealso \itemize{
+#'
+#'   \item \link[=dabest]{Loading data} for effect size computation.
+#'
+#'   \item \link[=plot.dabest_effsize]{Generating estimation plots} after effect
+#'   size computation.
+#'
+#'   \item The
+#'   \href{http://www.estimationstats.com/#/about-effect-sizes}{mathematical
+#'   definitions} and equations used to compute each effect size.
+#'
+#'   \item The \code{ \link[effsize:effsize-package]{effsize} } package, which
+#'   is used under the hood to compute Cohen's \emph{d}, Hedges' \emph{g}, and
+#'   Cliff's delta.
+#'
+#'   \item The \code{ \link[boot]{boot}() } and \code{ \link[boot]{boot.ci}() }
+#'   functions from the \code{boot} package, which generate the (nonparametric)
+#'   bootstrapped resamples used to compute the confidence intervals.
+#'
+#'   }
+#'
+#'
+#' @examples
+#' # Loading data for unpaired (two independent groups) analysis.
+#' petal_widths <- dabest(iris, Species, Petal.Width,
+#'                        idx = c("setosa", "versicolor"),
+#'                        paired = FALSE)
+#'
+#'
+#' # Compute the mean difference.
+#' mean_diff(petal_widths)
+#'
+#' # Plotting the mean differences.
+#' mean_diff(petal_widths) %>% plot()
+#'
+#' @export
+mean_diff    <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("mean_diff", x)
+
+#' @export
+mean_diff.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
+  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "mean_diff")
+}
+
+
+#' @rdname mean_diff
+#' @export
+median_diff  <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("median_diff", x)
+
+#' @export
+median_diff.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
+  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "median_diff")
+}
+
+
+#' @rdname mean_diff
+#' @export
+cohens_d     <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("cohens_d", x)
+
+#' @export
+cohens_d.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
+  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "cohens_d")
+}
+
+
+
+#' @rdname mean_diff
+#' @export
+hedges_g     <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("hedges_g", x)
+
+#' @export
+hedges_g.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
+  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "hedges_g")
+}
+
+
+
+#' @rdname mean_diff
+#' @export
+cliffs_delta <- function(x, ci = 95, reps = 5000 , seed = 12345) UseMethod("cliffs_delta", x)
+
+#' @export
+cliffs_delta.dabest <- function(x, ci = 95, reps = 5000 , seed = 12345) {
+  effect_size(x, ci = ci, reps = reps, seed = seed, effect.size = "cliffs_delta")
+}
+
+
+#### Helper functions that actually compute the effect sizes. ####
+mean_diff_ <- function(control, treatment, paired) {
+  if (identical(paired, FALSE)) return(mean(treatment) - mean(control))
+  else return(mean(treatment - control))
+}
+
+
+
+median_diff_ <- function(control, treatment, paired) {
+  if (identical(paired, FALSE)) return(median(treatment) - median(control))
+  else return(median(treatment - control))
+}
+
+
+
+cohens_d_ <- function(control, treatment, paired) {
+  return(effsize::cohen.d(treatment, control, paired=paired)$estimate)
+}
+
+
+
+hedges_g_ <- function(control, treatment, paired) {
+  cd <- cohens_d_(treatment, control, paired=paired)
+  # not sure why I have to negate this....
+  corr.factor <- -hedges_correction(treatment, control)
+  return(cd * corr.factor)
+}
+
+
+
+cliffs_delta_ <- function(control, treatment, paired=NA) {
+  return(effsize::cliff.delta(treatment, control)$estimate)
+}
+
+
+#' Returns the exact Hedges' correction factor for Cohen's d.
+#' @param x1 A vector of values.
+#'
+#' @param x2 Another vector of values.
+#'
+#' @return Hedges' correction for the g of x1 and x2.
+#'
+hedges_correction <- function(x1, x2) {
+
+  n1 <- length(x1)
+  n2 <- length(x2)
+
+  deg.freedom <- n1 + n2 - 2
+  numer       <- gamma(deg.freedom/2)
+  denom0      <- gamma((deg.freedom - 1) / 2)
+  denom       <- sqrt((deg.freedom / 2)) * denom0
+
+  if (is.infinite(numer) | is.infinite(denom)) {
+    # Occurs when df is too large.
+    # Applies Hedges and Olkin's approximation.
+    df.sum <- n1 + n2
+    denom <- (4 * df.sum) - 9
+    out <- 1 - (3 / denom)
+  } else out <- numer / denom
+
+  return(out)
+}
+
+
+
+#' Compute the standardizers for Cohen's d
+#' @param x1 A vector of values.
+#'
+#' @param x2 Another vector of values.
+#'
+#' @return Named list for pooled and average standardizers.
+#' Use the pooled one for unpaired Cohens' d, and the average one
+#' for paired Cohens'd d.
+#'
+cohen_d_standardizers <- function(x1, x2) {
+  control_n    <- length(x1)
+  test_n       <- length(x2)
+
+  control_mean <- mean(x1)
+  test_mean    <- mean(x2)
+
+  control_var  <- var(x1) # by default, use N-1 to compute the variance.
+  test_var     <- var(x2)
+
+  control_std  <- sd(x1)
+  test_std     <- sd(x2)
+
+  # For unpaired 2-groups standardized mean difference.
+  pooled <- sqrt(
+    ((control_n - 1) * control_var + (test_n - 1) * test_var) /
+      (control_n + test_n - 2)
+  )
+
+  # For paired standardized mean difference.
+  average <- sqrt((control_var + test_var) / 2)
+
+  # out <- c(pooled, average)
+  # names(out) <- c("pooled", "average")
+
+  return(list(pooled = pooled, average = average))
+
+}
+
+
+
+
+effsize_boot <- function(data, effsize_func, R = 5000, paired = FALSE) {
+
+  func <- match.fun(effsize_func)
+
+  # Create stata for resampling.
+  s <- c(rep(1, length(data$control)),
+         rep(2, length(data$test)))
+
+
+  # accounts for paired or unpaired.
+  bootboot <- function(d, indicies, paired) {
+    if (identical(paired, FALSE)) {
+      c <- d[indicies[s == 1]]
+      t <- d[indicies[s == 2]]
+    } else {
+      c <- d[indicies,1]
+      t <- d[indicies,2]
+    }
+    return(func(c, t, paired))
+  }
+  if (identical(paired, FALSE)) {
+    b <- boot(
+      c(data$control, data$test),
+      statistic = bootboot,
+      R = R,
+      strata = s,
+      paired = paired)
+  } else {
+    b <- boot(
+      data.frame(data$control, data$test),
+      statistic = bootboot,
+      R = R,
+      paired = paired)
+  }
+
+  return(b)
+
+}
+
+
+# effsize_boot <- function(effsize, data, paired, indices) {
+#   control <- data$control[indices]
+#   test <- data$test[indices]
+#
+#   cd <- effsize(control, test, paired)
+#
+#   return(cd)
+# }
+
+
+
+#' @importFrom boot boot boot.ci
+#' @importFrom dplyr arrange bind_rows filter group_by summarize
+#' @importFrom magrittr %>%
+#' @importFrom rlang quo_name
+#' @importFrom stringr str_interp
+#' @importFrom tibble tibble
+effect_size <- function(.data, ..., effect.size, ci, reps, seed) {
+
+  #### Check object class ####
+  if (class(.data)[1] != "dabest") {
+    stop(paste(
+      "The object you are plotting is not a `dabest` class object. ",
+      "Please check again! ")
+    )
+  }
+
+  # Create handles for easy access to the items in `.data`.
+  raw.data            <-  .data$data
+  idx                 <-  .data$idx
+  all.groups          <-  .data$.all.groups
+  paired              <-  .data$is.paired
+  data.name           <-  .data$.data.name
+  is.paired           <-  .data$is.paired
+
+  plot.groups.sizes   <-  unlist(lapply(idx, length))
+
+  # The variables below should are quosures!
+  x_enquo             <-  .data$x
+  y_enquo             <-  .data$y
+  effect.size_enquo   <-  quo_name(effect.size)
+
+  x_quoname           <-  quo_name(x_enquo)
+  y_quoname           <-  quo_name(y_enquo)
+
+  # effect.size_enquo   <-  rlang::enquo(effect.size)
+  # effect.size_quoname <-  rlang::quo_name(effect.size_enquo)
+
+  id.col_enquo        <-  .data$id.column
+
+  # Parse the effect.size.
+  if (effect.size == "mean_diff") {
+    es = mean_diff_
+    summ_func = mean
+    summ_name = "mean"
+  } else if (effect.size == "median_diff") {
+    es = median_diff_
+    summ_func = median
+    summ_name = "median"
+  } else if (effect.size == "cohens_d") {
+    es = cohens_d_
+    summ_func = mean
+    summ_name = "mean"
+  } else if (effect.size == "hedges_g") {
+    es = hedges_g_
+    summ_func = mean
+    summ_name = "mean"
+  } else if (effect.size == "cliffs_delta") {
+    es = cliffs_delta_
+    summ_func = median
+    summ_name = "median"
+  }
+
+
+
+  #### Loop through each comparison group. ####
+  result <- tibble() # To capture output.
+
+  for (group in idx) {
+
+    # Check the control group (`group[1]`) is in the x-column.
+    if (identical(group[1] %in% raw.data[[x_quoname]], FALSE)) {
+
+      err1 <- str_interp("${group[1]} is not found")
+      err2 <- str_interp("in the ${x_quoname} column.")
+
+      stop(paste(err1, err2))
+    }
+
+    # Patch in v0.2.2.
+    # Note how we have to unquote both the x_enquo, and the group name!
+    ctrl <- raw.data %>% filter(!!x_enquo == !!group[1])
+
+    ctrl <- ctrl[[y_quoname]]
+
+    c <- na.omit(ctrl)
+
+    # If ctrl is length 0, stop!
+    if (length(c) == 0) {
+      stop(
+        str_interp(
+          c("There are zero numeric observations in the group ${group[1]}.")
+        )
+      )
+    }
+
+    # Get test groups (everything else in group), loop through them and compute
+    # the difference between group[1] and each group.
+    # Test groups are the 2nd element of group onwards.
+    test_groups <- group[2: length(group)]
+
+    for (grp in test_groups) {
+
+      # Check if the current group is in the x-column.
+      if (identical(grp %in% raw.data[[x_quoname]], FALSE)) {
+        stop(
+          str_interp(
+            "${grp} is not found in the ${x_quoname} column."
+          )
+        )
+      }
+
+      # Patch in v0.2.2.
+      # Note how we have to unquote both x_enquo, and grp!
+      test <- raw.data %>% filter(!!x_enquo == !!grp)
+      test <- test[[y_quoname]]
+      t <- na.omit(test)
+
+      # If current test group is length 0, stop!
+      if (length(t) == 0) {
+        stop(
+          str_interp(
+            c("There are zero numeric observations in the group ${grp}.")
+          )
+        )
+      }
+
+
+
+      #### Compute bootstrap. ####
+      datalist <- list(control = ctrl, test = test)
+
+      set.seed(seed)
+      boot_result <- effsize_boot(datalist, effsize_func = es,
+                                  R = reps, paired = paired)
+
+      set.seed(NULL)
+
+      # if (identical(paired, FALSE)) {
+      #   diff <- func(t) - func(c)
+      #   # For two.boot, note that the first vector is the test vector.
+      #   boot <- simpleboot::two.boot(t, c, FUN = func, R = reps)
+      #
+      # } else {
+      #   if (length(c) != length(t)) {
+      #     stop("The two groups are not the same size, but paired = TRUE.")
+      #   }
+      #   paired_diff <- t - c
+      #   diff <- func(paired_diff)
+      #   boot <- simpleboot::one.boot(paired_diff, FUN = func, R = reps)
+      # }
+
+
+
+      #### Compute confidence interval. ####
+      # check CI.
+      if (ci < 0 | ci > 100) {
+        err_string <- str_interp(
+          "`ci` must be between 0 and 100, not ${ci}"
+        )
+        stop(err_string)
+      }
+
+      bootci <- boot.ci(boot_result, conf = ci/100, type = c("perc", "bca"))
+
+
+      #### Save pairwise result. ####
+      row <- tibble(
+        # Convert the name of `func` to a string.
+        control_group = group[1],
+        test_group = grp,
+        control_size = length(c),
+        test_size = length(t),
+        effect_size = effect.size,
+        paired = paired,
+        variable = y_quoname,
+        difference = boot_result$t0,
+        ci = ci,
+        bca_ci_low = bootci$bca[4],
+        bca_ci_high = bootci$bca[5],
+        pct_ci_low = bootci$percent[4],
+        pct_ci_high = bootci$percent[5],
+        bootstraps = list(as.vector(boot_result$t)),
+        nboots = length(boot_result$t)
+      )
+      result <- bind_rows(result, row)
+    }
+  }
+
+  # Reset seed.
+  set.seed(NULL)
+
+
+
+  #### Compute summaries. ####
+  summaries <-
+    raw.data %>%
+    filter(!!x_enquo %in% all.groups) %>%
+    group_by(!!x_enquo) %>%
+    summarize(func_quoname = summ_func(!!y_enquo))
+
+  colnames(summaries) <- c(x_quoname, summ_name)
+
+  # Order the summaries by the idx.
+  summaries[[x_quoname]] <-
+    summaries[[x_quoname]] %>%
+    factor(all.groups, ordered = TRUE)
+
+  summaries <- summaries %>% arrange(!!x_enquo)
+
+
+
+  #### Assemble only the data used to create the plot. ####
+  data.out <- raw.data
+
+  # New in v0.2.1 patch.
+  # Basically, the `ellipsis` package has been updated,
+  # and now forcats::as_factor() should only take the object to coerce.
+  data.out[[x_quoname]] <- forcats::as_factor(data.out[[x_quoname]])
+
+  data.out <- filter(data.out, !!x_enquo %in% all.groups)
+
+
+
+  #### Collate output. ####
+  out = list(
+    data = data.out,
+    x = x_enquo,
+    y = y_enquo,
+    idx = idx,
+    id.column = id.col_enquo,
+    is.paired = is.paired,
+    effect.size = effect.size,
+    .data.name = data.name,
+    result = result,
+    summary = summaries
+  )
+
+
+
+  #### Append the custom class `dabest_effsize`. ####
+  class(out) <- c("dabest_effsize", "list")
+
+
+
+  #### Return the output. ####
+  return(out)
+}
+
+
+
+#' Print a `dabest_effsize` object
+#'
+#' @param x A \code{dabest_effsize} object, generated by one of the
+#'   \link[=mean_diff]{effect size computation} functions.
+#'
+#' @param ... S3 signature for generic plot function.
+#'
+#' @param signif_digits Integer, default 3. All numeric figures in the printed
+#'   output will be rounded off to this number of significant digits.
+#'
+#'
+#' @return A summary of the effect sizes and respective confidence intervals.
+#'
+#' @examples
+#' # Performing unpaired (two independent groups) analysis.
+#' unpaired_mean_diff <- dabest(iris, Species, Petal.Width,
+#'                              idx = c("setosa", "versicolor"),
+#'                              paired = FALSE) %>%
+#'                       mean_diff()
+#'
+#' # Display the results in a user-friendly format.
+#' print(unpaired_mean_diff)
+#'
+#' @export
+#' @importFrom rlang quo_name
+#' @importFrom stringr str_interp
+print.dabest_effsize <- function(x, ..., signif_digits = 3) {
+
+  #### Check object class ####
+  if (class(x)[1] != "dabest_effsize") {
+    stop(paste(
+      "The object you are plotting is not a `dabest_effsize` class object. ",
+      "Please check again! "))
+  } else {
+    dabest.effsize <- x
+  }
+
+  #### Get results table and y var. ####
+  tbl <- dabest.effsize$result
+  var <- quo_name(dabest.effsize$y)
+
+  #### Print greeting header. ####
+  print_greeting_header()
+  # dabest_ver <- utils::packageVersion("dabestr")
+  # header     <- stringr::str_interp(
+  #   "DABEST (Data Analysis with Bootstrap Estimation) v${dabest_ver}\n")
+  # cat(header)
+  #
+  # cat(rep('=', nchar(header) - 1), sep='')
+  # cat("\n\n")
+
+  # cat(stringr::str_interp("Variable: ${var} \n\n"))
+
+  #### Print dataset name, xvar, and yvar. ####
+  xvar = quo_name(dabest.effsize$x)
+  yvar = quo_name(dabest.effsize$y)
+
+  cat(str_interp("Dataset    :  ${dabest.effsize$.data.name}\n"))
+  cat(str_interp("X Variable :  ${xvar}\n"))
+  cat(str_interp("Y Variable :  ${yvar}\n\n"))
+
+  #### Print each row. ####
+  cat(apply(tbl, 1, printrow_, sigdig = signif_digits),
+      sep = "\n")
+
+  #### Endnote about BCa. ####
+  cat(str_interp("${tbl$nboots[1]} bootstrap resamples.\n"))
+  cat("All confidence intervals are bias-corrected and accelerated.\n\n")
+
+}
+
+
+
+#' @importFrom stringr str_interp
+printrow_ <- function(my.row, sigdig = 3) {
+  if (identical(my.row$paired, TRUE)) p <- "Paired" else p <- "Unpaired"
+
+  # For labelling the y-axis.
+  effsizer <- list("mean difference", "median difference",
+                   "Cohen's d", "Hedges' g", "Cliff's delta")
+  names(effsizer) <- c("mean_diff", "median_diff",
+                       "cohens_d", "hedges_g", "cliffs_delta")
+
+  line1 <- str_interp(
+    c(
+      "${p} ${effsizer[[my.row$effect_size]]} of ",
+      "${my.row$test_group} ",
+      "(n = ${my.row$test_size}) ",
+      "minus ${my.row$control_group} ",
+      "(n = ${my.row$control_size})\n"
+    )
+  )
+
+
+  line2 <- str_interp(
+    c("${signif(my.row$difference, sigdig)} ",
+      "[${signif(my.row$ci, sigdig)}CI  ",
+      "${signif(my.row$bca_ci_low, sigdig)}; ",
+      "${signif(my.row$bca_ci_high, sigdig)}]\n\n")
+  )
+
+  cat(line1, line2)
+}
+
+
+