Skip to content

Commit

Permalink
Merge pull request #78 from krassowski/add_intersect_mode
Browse files Browse the repository at this point in the history
Make all three modes: distinct, intersect and union available
  • Loading branch information
krassowski authored Dec 20, 2020
2 parents 8c6750b + e71d56c commit 19a6f29
Show file tree
Hide file tree
Showing 77 changed files with 5,880 additions and 4,363 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ movies.png
requirements.txt
run_tests.sh
^data$
^.github$
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ComplexUpset
Type: Package
Title: Create Complex UpSet Plots Using 'ggplot2' Components
Version: 0.8.1
Version: 0.9.0
Authors@R: person(
"Michał", "Krassowski", email = "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9638-7785"))
Expand All @@ -26,7 +26,8 @@ Suggests:
Imports:
ggplot2,
patchwork,
scales
scales,
colorspace
VignetteBuilder: knitr
RoxygenNote: 7.1.1
Roxygen: list(markdown = TRUE)
22 changes: 22 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
# Generated by roxygen2: do not edit by hand

export(aes_percentage)
export(arrange_venn)
export(compare_between_intersections)
export(create_upset_abc_example)
export(geom_venn_circle)
export(geom_venn_label_region)
export(geom_venn_label_set)
export(geom_venn_region)
export(get_size_mode)
export(intersection_matrix)
export(intersection_ratio)
export(intersection_size)
export(reverse_log_trans)
export(scale_color_venn_mix)
export(scale_fill_venn_mix)
export(upset)
export(upset_annotate)
export(upset_data)
Expand All @@ -16,27 +25,40 @@ export(upset_set_size)
export(upset_test)
export(upset_text_percentage)
export(upset_themes)
importFrom(colorspace,RGB)
importFrom(colorspace,mixcolor)
importFrom(ggplot2,"%+%")
importFrom(ggplot2,aes)
importFrom(ggplot2,aes_)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,coord_flip)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,expr)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_col)
importFrom(ggplot2,geom_label)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_polygon)
importFrom(ggplot2,geom_segment)
importFrom(ggplot2,geom_text)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,is.ggplot)
importFrom(ggplot2,layer)
importFrom(ggplot2,position_stack)
importFrom(ggplot2,scale_color_manual)
importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,scale_x_discrete)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,scale_y_discrete)
importFrom(ggplot2,scale_y_reverse)
importFrom(ggplot2,stat_summary)
importFrom(ggplot2,sym)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(grDevices,col2rgb)
importFrom(grDevices,rgb)
importFrom(patchwork,guide_area)
importFrom(patchwork,plot_layout)
importFrom(patchwork,plot_spacer)
Expand Down
12 changes: 10 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
# Version 0.8.1
# Version 0.9.0

*Under development*
2020-12-20

New features:
- Intersection modes were formalized with the default remaining `exclusive_intersection` (alias `distinct`); additional modes are: `inclusive_intersection` (alias `intersect`), `inclusive_union` and `exclusive_union`; please read the relevant part of the documentation for details (#78).
- Simple Venn diagrams (for two or three sets) can now be constructed using same input (binary presence data frame) using pseudo geoms: `geom_venn_circle()`, `geom_venn_label_region()`, `geom_venn_label_set()`, `geom_venn_region()` and scales `scale_color_venn_mix()` and `scale_fill_venn_mix()`; while developed mostly for the documentation needs, it provides unique capability of highlighting relevant regions of the Venn diagram and placing observations within appropriate regions (which allows to demonstrate their attributes with appropriate aesthetics mapping).

Changes:
- Breaking: union size for "empty" intersection is now equal to its size

Bug fixes:
- Layers added to `upset_set_size()` and `intersection_matrix()` will now always go on top (avoiding geoms being hidden underneath)
- Declare layer in NAMESPACE to allow basic usage without loading `ggplot2`
- `upset_query()` will now throw an informative error when the user forgets to pass any aesthetics (#79)

# Version 0.8.0

Expand Down
179 changes: 164 additions & 15 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ compute_matrix = function(sorted_intersections, sorted_groups) {
}


compute_unions = function(data, sorted_intersections) {
compute_mode_inclusive_unions = function(data, sorted_intersections) {
intersections_as_groups = get_intersection_members(sorted_intersections)

result = sapply(
Expand All @@ -98,6 +98,54 @@ compute_unions = function(data, sorted_intersections) {
}




compute_mode_exclusive_unions = function(data, sorted_intersections) {

intersections_as_groups = get_intersection_members(sorted_intersections)

members = get_intersection_members(data[!duplicated(data$id), 'intersection'])

result = sapply(
intersections_as_groups,
function(i_groups) {
is_in_exclusive_union = sapply(members, function(i_members) {
common = setdiff(i_members, i_groups)
length(common) == 0
})

sum(is_in_exclusive_union)
},
simplify=TRUE
)
names(result) = sorted_intersections
result
}


# inclusive intersection
compute_mode_intersect = function(data, sorted_intersections) {
intersections_as_groups = get_intersection_members(sorted_intersections)

members = get_intersection_members(data[!duplicated(data$id), 'intersection'])

result = sapply(
intersections_as_groups,
function(i_groups) {
is_intersect = sapply(members, function(i_members) {
common = intersect(i_members, i_groups)
setequal(common, i_groups)
})

sum(is_intersect)
},
simplify=TRUE
)
names(result) = sorted_intersections
result
}


check_argument = function(
value,
allowed,
Expand Down Expand Up @@ -204,8 +252,10 @@ trim_intersections = function(
#' @param sort_intersections_by the mode of sorting, the size of the intersection (cardinality) by default; one of: `'cardinality'`, `'degree'`, `'ratio'`, or any combination of these (e.g. `c('degree', 'cardinality')`)
#' @param group_by the mode of grouping intersections; one of: `'degree'`, `'sets'`
#' @param min_max_early whether the min and max limits should be applied early (for faster plotting), or late (for accurate depiction of ratios)
#' @param union_count_column name of the column to store the union size (adjust if conflicts with your data)
#' @param intersection_count_column name of the column to store the intersection size (adjust if conflicts with your data)
#' @param inclusive_union_count_column name of the column to store the inclusive union size (adjust if conflicts with your data)
#' @param exclusive_union_count_column name of the column to store the exclusive union size (adjust if conflicts with your data)
#' @param inclusive_intersection_column name of the column to store the inclusive intersection size (adjust if conflicts with your data)
#' @param exclusive_intersection_column name of the column to store the exclusive intersection size (adjust if conflicts with your data)
#' @export
upset_data = function(
data, intersect, min_size=0, max_size=Inf, min_degree=0, max_degree=Inf,
Expand All @@ -218,8 +268,10 @@ upset_data = function(
sort_intersections_by='cardinality',
group_by='degree',
min_max_early=TRUE,
union_count_column='union_size',
intersection_count_column='intersection_size'
inclusive_intersection_column='size_intersect_mode',
exclusive_intersection_column='size_distinct_mode',
inclusive_union_count_column='size_inclusive_union_mode',
exclusive_union_count_column='size_exclusive_union_mode'
) {
if ('tbl' %in% class(data)) {
data = as.data.frame(data)
Expand Down Expand Up @@ -327,13 +379,16 @@ upset_data = function(
stacked$id = rep(1:nrow(data), length(intersect))
stacked = stacked[stacked$values == TRUE, ]

# Note: we do want to include the additional attributes as those provide info for filling set sizes
metadata = data[
match(
stacked$id,
1:nrow(data)
),
setdiff(colnames(data), intersect)
match(
stacked$id,
1:nrow(data)
),
setdiff(colnames(data), intersect),
drop=FALSE
]

stacked = cbind(stacked, metadata)

stacked$group = stacked$ind
Expand All @@ -358,7 +413,7 @@ upset_data = function(
sort_value = calculate_degree(original_intersections_names)
names(sort_value) = original_intersections_names
} else if (by == 'ratio') {
unsorted_union_sizes = compute_unions(stacked, names(intersections_by_size))
unsorted_union_sizes = compute_mode_inclusive_unions(stacked, names(intersections_by_size))
sort_value = intersections_by_size
sort_value = sort_value / unsorted_union_sizes
}
Expand Down Expand Up @@ -447,12 +502,40 @@ upset_data = function(
)
}

union_sizes = compute_unions(stacked, sorted_intersections)
# "stacked" does not contain the empty intersection, so those need to be added manually!
empty_observations = data$intersection[data$intersection == EMPTY_INTERSECTION]

if (length(empty_observations) != 0) {
highest_non_empty_id = max(stacked$id)
stack_for_empty = data.frame(
values=TRUE,
ind=empty_observations,
id=(highest_non_empty_id + 1):(highest_non_empty_id + length(empty_observations)),
intersection=empty_observations,
group=empty_observations
)

data_for_size_calculation = rbind(
stacked[, colnames(stack_for_empty)],
stack_for_empty
)
} else {
data_for_size_calculation = stacked
}

inclusive_union_sizes = compute_mode_inclusive_unions(data_for_size_calculation, sorted_intersections)
exclusive_union_sizes = compute_mode_exclusive_unions(data_for_size_calculation, sorted_intersections)

intersect_mode_sizes = compute_mode_intersect(data_for_size_calculation, sorted_intersections)

with_sizes = data.frame(data)

with_sizes[[union_count_column]] = union_sizes[data$intersection]
with_sizes[[intersection_count_column]] = intersections_by_size[data$intersection]
with_sizes[[inclusive_union_count_column]] = inclusive_union_sizes[data$intersection]
with_sizes[[exclusive_union_count_column]] = exclusive_union_sizes[data$intersection]

with_sizes[[exclusive_intersection_column]] = as.numeric(intersections_by_size[data$intersection])
with_sizes[[inclusive_intersection_column]] = intersect_mode_sizes[data$intersection]


list(
with_sizes=with_sizes,
Expand All @@ -465,9 +548,75 @@ upset_data = function(
groups=sorted_groups,
intersections=sorted_intersections
),
union_sizes=union_sizes,
union_sizes=inclusive_union_sizes,
intersect_mode_sizes=intersect_mode_sizes,
plot_intersections_subset=plot_intersections_subset,
plot_sets_subset=plot_sets_subset,
non_sanitized_labels=non_sanitized_labels
)
}

#' Create an example dataset with three sets: A, B and C
#'
#' @export
create_upset_abc_example = function() {
data.frame(
# 1) 100 in A only, 2) 100 in B only, 3) 1000 in C only
# 4) 10 in A-B only, 5) 6 in A-C only, 6) 6 in B-C only
# 7) 1 in A-B-C only, 8) 2 in neither
A = c(
# 1) 100 in A only
rep(T, 100),
# 2) 100 in B only
rep(F, 100),
# 3) 1000 in C only
rep(F, 1000),
# 4) 10 in A-B only
rep(T, 10),
# 5) 6 in A-C only
rep(T, 6),
# 6) 6 in B-C only
rep(F, 6),
# 7) 1 in A-B-C only
rep(T, 1),
# 8) 2 in neither
rep(F, 2)
),
B = c(
# 1) 100 in A only
rep(F, 100),
# 2) 100 in B only
rep(T, 100),
# 3) 1000 in C only
rep(F, 1000),
# 4) 10 in A-B only
rep(T, 10),
# 5) 6 in A-C only
rep(F, 6),
# 6) 6 in B-C only
rep(T, 6),
# 7) 1 in A-B-C only
rep(T, 1),
# 8) 2 in neither
rep(F, 2)
),
C = c(
# 1) 100 in A only
rep(F, 100),
# 2) 100 in B only
rep(F, 100),
# 3) 1000 in C only
rep(T, 1000),
# 4) 10 in A-B only
rep(F, 10),
# 5) 6 in A-C only
rep(T, 6),
# 6) 6 in B-C only
rep(T, 6),
# 7) 1 in A-B-C only
rep(T, 1),
# 8) 2 in neither
rep(F, 2)
)
)
}
Loading

0 comments on commit 19a6f29

Please sign in to comment.