Skip to content

Commit

Permalink
Added sample-agnostic workflow functions
Browse files Browse the repository at this point in the history
  • Loading branch information
browaeysrobin committed Mar 12, 2024
1 parent dd51a5d commit da03c09
Show file tree
Hide file tree
Showing 8 changed files with 954 additions and 61 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(get_abundance_info)
export(get_avg_pb_exprs)
export(get_empirical_pvals)
export(get_frac_exprs)
export(get_frac_exprs_sampleAgnostic)
export(get_ligand_activities_targets_DEgenes)
export(get_ligand_activities_targets_DEgenes_beta)
export(get_muscat_exprs_avg)
Expand Down
88 changes: 88 additions & 0 deletions R/expression_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,94 @@ get_frac_exprs = function(sce, sample_id, celltype_id, group_id, batches = NA, m
return(list(frq_df = frq_df, frq_df_group = frq_df_group, expressed_df = expressed_df))

}
#' @title get_frac_exprs_sampleAgnostic
#'
#' @description \code{get_frac_exprs_sampleAgnostic} Calculate the average fraction of expression of each gene per group. All cells from all samples will be pooled per group/condition.
#' @usage get_frac_exprs_sampleAgnostic(sce, sample_id, celltype_id, group_id, batches = NA, min_cells = 10, fraction_cutoff = 0.05, min_sample_prop = 0.5)
#'
#' @inheritParams get_frac_exprs
#' @param min_sample_prop Default, and only recommended value = 1. Hereby, the gene should be expressed in at least one group/condition.
#'
#' @return List containing data frames with the fraction of expression per sample and per group.
#'
#' @import dplyr
#' @import tibble
#' @importFrom tidyr gather
#' @importFrom SummarizedExperiment colData
#'
#' @examples
#' \dontrun{
#' library(dplyr)
#' sample_id = "tumor"
#' group_id = "pEMT"
#' celltype_id = "celltype"
#' frac_info = get_frac_exprs_sampleAgnostic(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id)
#' }
#'
#' @export
#'
get_frac_exprs_sampleAgnostic = function(sce, sample_id, celltype_id, group_id, batches = NA,
min_cells = 10, fraction_cutoff = 0.05, min_sample_prop = 1){

sample_id = group_id
frq_df = get_muscat_exprs_frac(sce, sample_id = sample_id,
celltype_id = celltype_id, group_id = group_id) %>% .$frq_celltype_samples
metadata = SummarizedExperiment::colData(sce) %>% tibble::as_tibble()
metadata_abundance = metadata %>% dplyr::select(group_id, celltype_id)
print(head(metadata_abundance))
metadata_abundance = cbind(metadata[, group_id], metadata_abundance)
print(head(metadata_abundance))
colnames(metadata_abundance) = c("sample", "group", "celltype")
print(head(metadata_abundance))
metadata_abundance = metadata_abundance %>% tibble::as_tibble()
print(head(metadata_abundance))

abundance_data = metadata_abundance %>% tibble::as_tibble() %>%
dplyr::group_by(sample, celltype) %>% dplyr::count() %>%
dplyr::inner_join(metadata_abundance %>% tibble::as_tibble() %>%
dplyr::distinct(sample, group), by = "sample")
abundance_data = abundance_data %>% dplyr::mutate(keep_sample = n >=
min_cells) %>% dplyr::mutate(keep_sample = factor(keep_sample,
levels = c(TRUE, FALSE)))
grouping_df = abundance_data[,c("sample","group")] %>%
tibble::as_tibble() %>% dplyr::distinct()
grouping_df_filtered = grouping_df %>% inner_join(abundance_data) %>%
filter(keep_sample == TRUE)
print(paste0("Groups are considered if they have more than ",
min_cells, " cells of the cell type of interest"))
frq_df_group = frq_df %>% dplyr::inner_join(grouping_df_filtered) %>%
dplyr::group_by(group, celltype, gene) %>% dplyr::summarise(fraction_group = mean(fraction_sample))
n_smallest_group_tbl = grouping_df_filtered %>% dplyr::group_by(group,
celltype) %>% dplyr::count() %>% dplyr::group_by(celltype) %>%
dplyr::summarize(n_smallest_group = min(n)) %>% dplyr::mutate(n_min = min_sample_prop *
n_smallest_group) %>% distinct()
print(paste0("Genes with non-zero counts in at least ", fraction_cutoff *
100, "% of cells of a cell type of interest in a particular group/condition will be considered as expressed in that group/condition"))
for (i in seq(length(unique(n_smallest_group_tbl$celltype)))) {
celltype_oi = unique(n_smallest_group_tbl$celltype)[i]
n_min = n_smallest_group_tbl %>% filter(celltype == celltype_oi) %>%
pull(n_min)
print(paste0("Genes expressed in at least ", n_min, " group will considered as expressed in the cell type: ",
celltype_oi))
}
frq_df = frq_df %>% dplyr::inner_join(grouping_df) %>% dplyr::mutate(expressed_sample = fraction_sample >=
fraction_cutoff)
expressed_df = frq_df %>% inner_join(n_smallest_group_tbl) %>%
inner_join(abundance_data) %>% dplyr::group_by(gene,
celltype) %>% dplyr::summarise(n_expressed = sum(expressed_sample)) %>%
dplyr::mutate(expressed = n_expressed >= n_min) %>% distinct(celltype,
gene, expressed)
for (i in seq(length(unique(expressed_df$celltype)))) {
celltype_oi = unique(expressed_df$celltype)[i]
n_genes = expressed_df %>% filter(celltype == celltype_oi) %>%
filter(expressed == TRUE) %>% pull(gene) %>% unique() %>%
length()
print(paste0(n_genes, " genes are considered as expressed in the cell type: ",
celltype_oi))
}
return(list(frq_df = frq_df, frq_df_group = frq_df_group,
expressed_df = expressed_df))
}
#' @title process_info_to_ic
#'
#' @description \code{process_info_to_ic} Process cell type expression information into intercellular communication focused information. Only keep information of ligands for the sender cell type setting, and information of receptors for the receiver cell type.
Expand Down
85 changes: 27 additions & 58 deletions R/pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ multi_nichenet_analysis = function(sce,
#' covariates = NA
#' contrasts_oi = c("'High-Low','Low-High'")
#' contrast_tbl = tibble(contrast = c("High-Low","Low-High"), group = c("High","Low"))
#' output = multi_nichenet_analysis(
#' output = multi_nichenet_analysis_sampleAgnostic(
#' sce = sce,
#' celltype_id = celltype_id,
#' sample_id = sample_id,
Expand Down Expand Up @@ -919,12 +919,15 @@ multi_nichenet_analysis_sampleAgnostic = function(sce,
# sce = sce[, SummarizedExperiment::colData(sce)[,group_id] %in% contrast_tbl$group] # keep only considered groups
# do not do this -- this could give errors if only interested in one contrast but multiple groups

sce = scuttle::logNormCounts(sce)


if(verbose == TRUE){
print("Make diagnostic abundance plots + define expressed genes")
}
## check abundance info

abundance_info = get_abundance_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, batches = batches)
abundance_info = get_abundance_info(sce = sce, sample_id = group_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, batches = batches)

## check for condition-specific cell types
sample_group_celltype_df = abundance_info$abundance_data %>% filter(n > min_cells) %>% ungroup() %>% distinct(sample_id, group_id) %>% cross_join(abundance_info$abundance_data %>% ungroup() %>% distinct(celltype_id)) %>% arrange(sample_id)
Expand All @@ -933,11 +936,11 @@ multi_nichenet_analysis_sampleAgnostic = function(sce,
abundance_df$keep[is.na(abundance_df$keep)] = FALSE
abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() # find truly condition-specific cell types by searching for cell types truely absent in at least one condition
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() # require presence in at least 2 samples of one group so it is really present in at least one condition
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 1) %>% pull(celltype_id) %>% unique() # require presence in at least 2 samples of one group so it is really present in at least one condition
condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)

total_nr_conditions = SummarizedExperiment::colData(sce)[,group_id] %>% unique() %>% length()
absent_celltypes = abundance_df_summarized %>% dplyr::filter(samples_present < 2) %>% dplyr::group_by(celltype_id) %>% dplyr::count() %>% dplyr::filter(n == total_nr_conditions) %>% dplyr::pull(celltype_id)
absent_celltypes = abundance_df_summarized %>% dplyr::filter(samples_present < 1) %>% dplyr::group_by(celltype_id) %>% dplyr::count() %>% dplyr::filter(n == total_nr_conditions) %>% dplyr::pull(celltype_id)

print("condition-specific celltypes:")
print(condition_specific_celltypes)
Expand All @@ -953,49 +956,26 @@ multi_nichenet_analysis_sampleAgnostic = function(sce,
sce = sce[, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes]

## define expressed genes
frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = min_sample_prop)
frq_list = get_frac_exprs_sampleAgnostic(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = min_sample_prop)

### Perform the DE analysis ----------------------------------------------------------------

if(verbose == TRUE){
print("Calculate differential expression for all cell types")
}

if(findMarkers == FALSE){
DE_info = get_DE_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, batches = batches, covariates = covariates, contrasts_oi = contrasts_oi, min_cells = min_cells,
assay_oi_pb = assay_oi_pb,
fun_oi_pb = fun_oi_pb,
de_method_oi = de_method_oi,
findMarkers = findMarkers,
expressed_df = frq_list$expressed_df)
} else {
DE_info = get_DE_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, batches = batches, covariates = covariates, contrasts_oi = contrasts_oi, min_cells = min_cells,
DE_info = get_DE_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, batches = batches, covariates = covariates, contrasts_oi = contrasts_oi, min_cells = min_cells,
assay_oi_pb = assay_oi_pb,
fun_oi_pb = fun_oi_pb,
de_method_oi = de_method_oi,
findMarkers = findMarkers,
findMarkers = TRUE,
contrast_tbl = contrast_tbl,
expressed_df = frq_list$expressed_df)
}

if(empirical_pval == TRUE){
if(findMarkers == TRUE){
DE_info_emp = get_empirical_pvals(DE_info$celltype_de_findmarkers)
} else {
DE_info_emp = get_empirical_pvals(DE_info$celltype_de$de_output_tidy)
}
}

if(empirical_pval == FALSE){
if(findMarkers == TRUE){
celltype_de = DE_info$celltype_de_findmarkers
} else {
celltype_de = DE_info$celltype_de$de_output_tidy
}
} else {
celltype_de = DE_info_emp$de_output_tidy_emp %>% dplyr::select(-p_val, -p_adj) %>% dplyr::rename(p_val = p_emp, p_adj = p_adj_emp)
}


celltype_de = DE_info$celltype_de_findmarkers

# print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n))

senders_oi = celltype_de$cluster_id %>% unique()
Expand Down Expand Up @@ -1023,23 +1003,28 @@ multi_nichenet_analysis_sampleAgnostic = function(sce,
if(verbose == TRUE){
print("Calculate normalized average and pseudobulk expression")
}
abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = union(senders_oi, condition_specific_celltypes), receivers_oi = union(receivers_oi, condition_specific_celltypes), lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)
abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = group_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = union(senders_oi, condition_specific_celltypes), receivers_oi = union(receivers_oi, condition_specific_celltypes), lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)

metadata_combined = SummarizedExperiment::colData(sce) %>% tibble::as_tibble()

if(!is.na(batches)){
grouping_tbl = metadata_combined[,c(sample_id, group_id, batches)] %>% tibble::as_tibble() %>% dplyr::distinct()
colnames(grouping_tbl) = c("sample","group",batches)
grouping_tbl = metadata_combined[,c(group_id, batches)] %>% tibble::as_tibble() %>% dplyr::distinct()
colnames(grouping_tbl) = c("group",batches)
grouping_tbl = grouping_tbl %>% mutate(sample = group)
grouping_tbl = grouping_tbl %>% tibble::as_tibble()
} else {
grouping_tbl = metadata_combined[,c(sample_id, group_id)] %>% tibble::as_tibble() %>% dplyr::distinct()
colnames(grouping_tbl) = c("sample","group")
grouping_tbl = metadata_combined[,c(group_id)] %>% tibble::as_tibble() %>% dplyr::distinct()
colnames(grouping_tbl) = c("group")
grouping_tbl = grouping_tbl %>% mutate(sample = group) %>% select(sample, group)

}

rm(sce)
### Use the DE analysis for defining the DE genes in the receiver cell type and perform NicheNet ligand activity and ligand-target inference ----------------------------------------------------------------
if(verbose == TRUE){
print("Calculate NicheNet ligand activities and ligand-target links")
}

ligand_activities_targets_DEgenes = suppressMessages(suppressWarnings(get_ligand_activities_targets_DEgenes(
receiver_de = celltype_de,
receivers_oi = receivers_oi,
Expand Down Expand Up @@ -1072,29 +1057,13 @@ multi_nichenet_analysis_sampleAgnostic = function(sce,
))

# Prepare Unsupervised analysis of samples! ------------------------------------------------------------------------------------------------------------

if(return_lr_prod_matrix == TRUE){

ids_oi = prioritization_tables$group_prioritization_tbl %>% dplyr::filter(fraction_expressing_ligand_receptor > 0) %>% dplyr::pull(id) %>% unique()

lr_prod_df = abundance_expression_info$sender_receiver_info$pb_df %>% dplyr::inner_join(grouping_tbl, by = "sample") %>% dplyr::mutate(lr_interaction = paste(ligand, receptor, sep = "_")) %>% dplyr::mutate(id = paste(lr_interaction, sender, receiver, sep = "_")) %>% dplyr::select(sample, id, ligand_receptor_pb_prod) %>% dplyr::filter(id %in% ids_oi) %>% dplyr::distinct() %>% tidyr::spread(id, ligand_receptor_pb_prod)
lr_prod_mat = lr_prod_df %>% dplyr::select(-sample) %>% data.frame() %>% as.matrix()
rownames(lr_prod_mat) = lr_prod_df$sample

col_remove = lr_prod_mat %>% apply(2,function(x)sum(x != 0)) %>% .[. == 0] %>% names()
row_remove = lr_prod_mat %>% apply(1,function(x)sum(x != 0)) %>% .[. == 0] %>% names()

lr_prod_mat = lr_prod_mat %>% .[rownames(.) %>% generics::setdiff(col_remove),colnames(.) %>% generics::setdiff(col_remove)]
} else {
lr_prod_mat = NULL
}
lr_prod_mat = NULL

# Add information on prior knowledge and expression correlation between LR and target expression ------------------------------------------------------------------------------------------------------------
if(verbose == TRUE){
print("Calculate correlation between LR pairs and target genes")
}
lr_target_prior_cor = lr_target_prior_cor_inference(prioritization_tables$group_prioritization_tbl$receiver %>% unique(), abundance_expression_info, celltype_de, grouping_tbl, prioritization_tables, ligand_target_matrix, logFC_threshold = logFC_threshold, p_val_threshold = p_val_threshold, p_val_adj = p_val_adj, top_n_LR = top_n_LR)

lr_target_prior_cor = tibble()
## save output

if(length(condition_specific_celltypes) > 0) {
Expand Down Expand Up @@ -1144,7 +1113,7 @@ multi_nichenet_analysis_sampleAgnostic = function(sce,
prioritization_tables_with_condition_specific_celltype_receiver = prioritization_tables_with_condition_specific_celltype_receiver,
combined_prioritization_tables = combined_prioritization_tables,
grouping_tbl = grouping_tbl,
lr_target_prior_cor = lr_target_prior_cor
lr_target_prior_cor = tibble()
)

multinichenet_output = make_lite_output_condition_specific(multinichenet_output, top_n_LR = top_n_LR)
Expand All @@ -1160,7 +1129,7 @@ multi_nichenet_analysis_sampleAgnostic = function(sce,
ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
prioritization_tables = prioritization_tables,
grouping_tbl = grouping_tbl,
lr_target_prior_cor = lr_target_prior_cor
lr_target_prior_cor = tibble()
)
multinichenet_output = make_lite_output(multinichenet_output, top_n_LR = top_n_LR)
}
Expand Down
2 changes: 1 addition & 1 deletion R/pipeline_wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1005,4 +1005,4 @@ make_lite_output_condition_specific = function(multinichenet_output, top_n_LR =
}

return(multinichenet_output)
}
}
41 changes: 41 additions & 0 deletions man/get_frac_exprs_sampleAgnostic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit da03c09

Please sign in to comment.