Skip to content

Commit

Permalink
UPD: R package extract funs
Browse files Browse the repository at this point in the history
  • Loading branch information
Craig44 committed Mar 17, 2022
1 parent e269c2f commit 66ec6e7
Show file tree
Hide file tree
Showing 9 changed files with 352 additions and 124 deletions.
Binary file modified R-libraries/Casal2_22.03.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion R-libraries/casal2/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: Casal2
Title: Casal2 extract package
Version: 22.03
Date: 2022-03-02
Date: 2022-03-15
Author: Casal2 Development Team
Description: A set of R functions for extracting and plotting from Casal2 configuration input files, reports, and other associated files.
Maintainer: Casal2 Development Team <[email protected]>
Expand Down
197 changes: 197 additions & 0 deletions R-libraries/casal2/R/ShortHandFunctions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@

#' @title expand_category_block
#'
#' @description
#' A utility function for expanding short hand syntax in @category blocks in casal2 config files
#'
#' @author Craig Marsh
#' @param categories string of categories
#' @return a vector of strings
#' @keywords internal
#' @examples
#' \dontrun{
#' expand_category_block("stock")
#' expand_category_block("stock.male,female")
#' expand_category_block("stock.male,female.untagged,1990")
#' }
expand_category_block <- function(categories) {
expanded_labels = vector();
groups = strsplit(categories, split = ".", fixed = TRUE)[[1]]
category_list = list()
for(i in 1:length(groups)) {
if(grepl(groups[i], pattern = ",")) {
sub_groups = strsplit(groups[i], split = ",", fixed = TRUE)[[1]]
category_list[[i]] = sub_groups
} else {
category_list[[i]] = groups[i]
}
}
## now permutate them
all_perms <- expand.grid(category_list, stringsAsFactors = FALSE)
new_perms = NULL
## need to re-order based on first factor
i = 1
for(j in 1:length(category_list[[i]])) {
ndx = all_perms$Var1 == category_list[[i]][j]
new_perms = rbind(new_perms, all_perms[ndx,])
}
for(i in 1:nrow(new_perms)) {
expanded_labels = c(expanded_labels, paste0(new_perms[i,], collapse = "."))
}
return(expanded_labels)
}

#' @title expand_category_shorthand
#'
#' @description
#' A utility function for expanding short hand syntax of categories used in subcommands throught casal2 config files
#'
#' @author Craig Marsh
#' @param shorthand_categories shorthand values to expand
#' @param reference_categories string of categories expanded from the @category block of a config. derived from expand_category_block
#' @param category_format the format defined in the @categories block
#' @return a vector of strings
#' @keywords internal
#' @examples
#' \dontrun{
#' expand_category_shorthand(shorthand_categories ="format=*.EN.*", reference_categories = c("BOP.EN.male"))
#' expand_category_shorthand(shorthand_categories ="format=*.EN.*", reference_categories = c("BOP.EN.male", "BOP.EN.female"))
#' expand_category_shorthand(shorthand_categories ="format=*.EN.*", reference_categories = c("BOP.EN.male", "BOP.EN.female", "HAGU.EN.male", "HAGU.EN.female"))
#' expand_category_shorthand(shorthand_categories ="format = *.EN.+", reference_categories = c("BOP.EN.male", "BOP.EN.female"))
#' expand_category_shorthand(shorthand_categories ="format = *.EN.+", reference_categories = c("BOP.EN.male", "BOP.EN.female", "HAGU.EN.male", "HAGU.EN.female"))
#' expand_category_shorthand(shorthand_categories ="*", reference_categories = c("BOP.EN.male", "BOP.EN.female", "HAGU.EN.male", "HAGU.EN.female"))
#' expand_category_shorthand(shorthand_categories ="stock=BOP", reference_categories = c("BOP.EN.male", "BOP.EN.female", "HAGU.EN.male", "HAGU.EN.female"), category_format = "stock.area.sex")
#' }
expand_category_shorthand <- function(shorthand_categories, reference_categories, category_format = NULL) {
expanded_categories = vector();
formats = NULL
if(!is.null(category_format)) {
formats = strsplit(category_format, ".", fixed = T)[[1]]
}
broken_ref_vals = Reduce(rbind, strsplit(reference_categories, ".", fixed = T))
if(is.null(dim(broken_ref_vals))) {
broken_ref_vals = matrix(broken_ref_vals, nrow =1)
}
# strip whitespace
shorthand_categories_no_space = gsub(" ", "", shorthand_categories, fixed = TRUE)
expanded_categories = shorthand_categories_no_space
# if format strip that out
format_found = grepl(shorthand_categories_no_space, pattern = "format=", fixed = TRUE)
if(format_found) {
shorthand_categories_no_space = gsub("format=", "", shorthand_categories_no_space, fixed = TRUE)
}
# if other shorthand
format_found = grepl(shorthand_categories_no_space, pattern = "=", fixed = TRUE)
if(format_found) {
expanded_categories = NULL
temp_formats = strsplit(shorthand_categories_no_space, "=", fixed = T)[[1]]
cat_ndx = which(formats == temp_formats[1])
ref_ndx = broken_ref_vals[,cat_ndx] == temp_formats[2]
these_categories = broken_ref_vals[ref_ndx, ]
for(i in 1:nrow(these_categories)) {
expanded_categories = c(expanded_categories, paste0(these_categories[i,], collapse = "."))
}
}


## now should be only category labels, periods ('.'), astrix, or +
rep_syntax = grepl(shorthand_categories_no_space, pattern = "*", fixed = TRUE)
category_list = list();
if(rep_syntax) {
if(shorthand_categories_no_space == "*") {
# return all categories
expanded_categories = reference_categories
} else {
## need to repeat some category levels
broken_vals = strsplit(shorthand_categories_no_space, ".", fixed = T)[[1]]
for(i in 1:length(broken_vals)) {
if(broken_vals[i] == "*") {
category_list[[i]] = unique(broken_ref_vals[,i])
} else {
category_list[[i]] = broken_vals[i]
}
}
## now permutate them
all_perms <- expand.grid(category_list, stringsAsFactors = FALSE)
for(i in 1:nrow(all_perms)) {
expanded_categories = c(expanded_categories, paste0(all_perms[i,], collapse = "."))
}
}
}
return(expanded_categories)
}

#' @title expand_shorthand_syntax
#'
#' @description
#' A utility function for expanding short hand syntax for based on - format=*.EN.*, label*5, and + syntax
#'
#' @author Craig Marsh
#' @param syntax string of the syntax to expand
#' @return a vector of strings
#' @keywords internal
#' @examples
#' \dontrun{
#' # currently can handle these shorthand
#' expand_shorthand_syntax(syntax = "age_length")
#' expand_shorthand_syntax(syntax = "age_length * 8")
#' expand_shorthand_syntax(syntax ="age_length*8")
#' expand_shorthand_syntax(syntax ="age_length_BP*4 age_length_EN*4")
#' expand_shorthand_syntax(syntax = "1990:2000")
#' expand_shorthand_syntax(syntax ="age_length_BP*4,age_length_EN*4")
#' expand_shorthand_syntax(syntax ="0.75*3")
#' expand_shorthand_syntax(syntax ="0.75 * 3 ")
#' # cant deal with
#' expand_shorthand_syntax(syntax ="0.75*3 0.75*3")
#' }
expand_shorthand_syntax <- function(syntax) {
syntax = paste(syntax, collapse = "")
## strip whitespace out of syntax to make it easier
syntax_no_space = gsub(" ", "", syntax, fixed = TRUE)
# we wont deal with ',' so strip them out to
syntax_no_space = gsub(",", "", syntax_no_space, fixed = TRUE)

rep_syntax = grepl(syntax_no_space, pattern = "*", fixed = TRUE)
colon_syntax = grepl(syntax_no_space, pattern = ":", fixed = TRUE)

## deal with repeat labels
repeated_values = vector();
if(rep_syntax) {
## simple expansion
rep_components = strsplit(syntax_no_space, split = "*", fixed = TRUE)[[1]]
convert_vals = suppressWarnings(as.numeric(rep_components))
if(all(!is.na(convert_vals))) {
## all numbers so must be 0.45 * 2
# pairs = length(rep_components) / 2
if(length(rep_components) != 2)
stop("unknown syntax")
repeated_values = rep(convert_vals[1], convert_vals[2])
} else {
# Numbers and words
for(i in 1:length(rep_components)) {
# no numbers
if(gsub("[^\\d]+", "", rep_components[i], perl=TRUE) == "") {
## simple case "label"
value = rep_components[i]
} else if(gsub("[^\\d]+", "", rep_components[i], perl=TRUE) != "" & (nchar(gsub("[^\\d]+", "", rep_components[i], perl=TRUE)) != nchar(rep_components[i]))) {
## has numbers and words...
## get number
rep_num = as.numeric(gsub("[^\\d]+", "", rep_components[i], perl=TRUE))
repeated_values = c(repeated_values, rep(value, rep_num))
## get the other word
value = gsub('[[:digit:]]+', '', rep_components[i])
} else {
## just number repeat value by this number
repeated_values = c(repeated_values, rep(value, as.numeric(gsub("[^\\d]+", "", rep_components[i], perl=TRUE))))
}
}
}
} else if(colon_syntax) {
## assumes only numbers lowerval:upperval
seq_components = strsplit(syntax_no_space, split = ":", fixed = TRUE)[[1]]
repeated_values = as.numeric(seq_components[1]):as.numeric(seq_components[2])
} else {
repeated_values = syntax
}
return(repeated_values)
}
21 changes: 18 additions & 3 deletions R-libraries/casal2/R/extract.csl2.file.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
## this will be the default label if no user defined label is specified
for (i in 1:length(file)) {
temp <- string.to.vector.of.words(file[i])
check_inputs <- check_short_hand(temp)
## expand numeric shorthand
#if (any(check_inputs$numeric) && (substr(Command, 1, 3) != "est") | (substr(Command, 1, 3) != "par")) {
#vals <- eval(parse(text = temp[check_inputs$numeric]))
Expand Down Expand Up @@ -101,8 +100,24 @@
}
## Check if it is a valid type/subcommand
if (type == "vector") {
## deal with a vector subcommand
ans[[Command]][[temp[1]]] <- list("value" = temp[-1])
new_string = temp[1]
## if category or parameter subcommand skip expand short hand for * and :
if(grepl(temp[1], pattern = "categor", fixed = TRUE) | grepl(temp[1], pattern = "parameter", fixed = TRUE)) {
new_string = temp
} else {
## check white space hasn't split it up already
second_values = NULL
if(any(temp[2:length(temp)] == "*")) {
second_values = paste(temp[2:length(temp)], collapse = "")
} else {
second_values = temp[2:length(temp)]
}
## deal with : shorthand
for (j in 1:length(second_values)) {
new_string <- c(new_string, expand_shorthand_syntax(second_values[j]))
}
}
ans[[Command]][[new_string[1]]] <- list("value" = new_string[-1])
} else if ((type == "table_label") || in_table) {
## deal with a table input. the biggest pain in ithe ass
in_table <- TRUE
Expand Down
Loading

0 comments on commit 66ec6e7

Please sign in to comment.