From cec735cc254ead53e9e9a27991930c835e9785bb Mon Sep 17 00:00:00 2001 From: Xue Xiao <72620652+Heinyxiao@users.noreply.github.com> Date: Fri, 6 Sep 2024 13:05:38 -0400 Subject: [PATCH] Add files via upload --- AUCell.Rmd | 89 ++++ BCSCdb.Rmd | 60 +++ CellChat2.Rmd | 174 +++++++ MAGIC_OC.ipynb | 1295 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1618 insertions(+) create mode 100644 AUCell.Rmd create mode 100644 BCSCdb.Rmd create mode 100644 CellChat2.Rmd create mode 100644 MAGIC_OC.ipynb diff --git a/AUCell.Rmd b/AUCell.Rmd new file mode 100644 index 0000000..7a1e603 --- /dev/null +++ b/AUCell.Rmd @@ -0,0 +1,89 @@ +--- +title: "AUCell" +author: "Xue Xiao" +date: "2024-06-20" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Install AUCell +```{r eval=FALSE, echo=FALSE, message=FALSE, warning=FALSE} +BiocManager::install("AUCell") +``` + +## Load Packages +```{r} +library(AUCell) +library(Matrix) +library(SummarizedExperiment) +library(Seurat) +``` + + +## Load Genesets and Datasets +```{r} +setwd("/Users/xuexiao/Library/CloudStorage/GoogleDrive-heinyxiao@gmail.com/My Drive/Lab/Projects/Dedifferentiation/Data") +CC1_markers <- read.csv("CC1_markers.csv", header = T, row.names = 1) +CC1_geneset <- CC1_markers$Genes + +CSC_markers_0.007 <- read.csv("filtered_markers_above_0_007.csv", header = T, row.names = 1) +CSC_geneset_0.007 <- CSC_markers_0.007$GENE + +CSC_markers_0.006 <- read.csv("filtered_markers_above_0_006.csv", header = T, row.names = 1) +CSC_geneset_0.006 <- CSC_markers_0.006$GENE + +CSC_markers_0.005 <- read.csv("filtered_markers_above_0_005.csv", header = T, row.names = 1) +CSC_geneset_0.005 <- CSC_markers_0.005$GENE + +gene_sets <- list( + CC1 = CC1_geneset, + CSC_0.007 = CSC_geneset_0.007, + CSC_0.006 = CSC_geneset_0.006, + CSC_0.005 = CSC_geneset_0.005 +) +common_genes <- intersect(CC1_geneset, CSC_geneset_0.005) +``` + + +```{r} +# Open Seurat File +seurat_obj <- readRDS("~/Library/CloudStorage/GoogleDrive-heinyxiao@gmail.com/My Drive/Lab/Projects/Dedifferentiation/Data/Cancer_cell_in_house_magic_cytotrace.rds") + +# Extract the expression matrix +expr_matrix <- GetAssayData(seurat_obj, layer = "counts") + +``` + +## Run AUCell +```{r} +# Build the rankings +cells_rankings <- AUCell_buildRankings(expr_matrix, nCores = 1, plotStats = TRUE) + +# Calculate the AUCell scores +cells_AUC <- AUCell_calcAUC(gene_sets, cells_rankings) + +# Add AUCell scores to Seurat metadata +aucell_scores <- as.data.frame(t(as.data.frame(cells_AUC@assays@data$AUC))) +colnames(aucell_scores) <- paste0("AUCell_", colnames(aucell_scores)) +seurat_obj <- AddMetaData(seurat_obj, metadata = aucell_scores) + +# View the Seurat object metadata to check if scores were added +head(seurat_obj@meta.data) + +``` + +## Visualization +```{r inline_plot, fig.width=7, fig.height=5} + +# Define custom color palette +custom_colors <- scale_color_gradientn(colors = c("gray", "yellow", "red")) + +# Visualize the AUCell scores with custom color scheme +FeaturePlot(seurat_obj, features = colnames(aucell_scores), pt.size = 0.2, label = T) & custom_colors + +``` + + diff --git a/BCSCdb.Rmd b/BCSCdb.Rmd new file mode 100644 index 0000000..3321e9a --- /dev/null +++ b/BCSCdb.Rmd @@ -0,0 +1,60 @@ +--- +title: "BCSCdb" +author: "Xue Xiao" +date: "2024-06-18" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Load packages +```{r} +library(ggplot2) +``` + + +## Load all CSC markers +```{r} +setwd("/Users/xuexiao/Library/CloudStorage/GoogleDrive-heinyxiao@gmail.com/My Drive/Lab/Projects/Dedifferentiation/Data") +all_CSC_markers <- read.csv("CSC_Biomarker_2022_All.csv", header = F) +head(all_CSC_markers) +length(unique(all_CSC_markers$V1)) +``` + +## Filter top 100 markers +```{r} +## Unique markers +unique_CSC_markers <- all_CSC_markers[!duplicated(all_CSC_markers$V1), ] + +## Sort the CSC markers in descending order of global score +sorted_CSC_markers <- unique_CSC_markers[order(-as.numeric(unique_CSC_markers$V11)), ] + +## Check distribution of global score +ggplot(unique_CSC_markers, aes(x = V11)) + + geom_histogram(binwidth = 0.05, fill = "blue", color = "black", alpha = 0.7) + + scale_x_continuous(limits = c(-0.1, 1), breaks = seq(-1, 1, by = 0.05)) + + labs(title = "Distribution of Global Scores", + x = "Global Score", + y = "Frequency") + + theme_minimal() + + stat_bin(binwidth = 0.05, geom = "text", aes(label = ..count..), vjust = -0.5, color = "black") +## Select the top 100 markers according to global score +table(unique_CSC_markers$V11) +``` + + +```{r} +sum(unique_CSC_markers$V11 > 0.007, na.rm = TRUE) # 105 genes +sum(unique_CSC_markers$V11 > 0.006, na.rm = TRUE) # 158 genes +sum(unique_CSC_markers$V11 > 0.005, na.rm = TRUE) # 269 genes +``` +## Filter genes with global score +```{r} +filtered_markers <- unique_CSC_markers[unique_CSC_markers$V11 > 0.005, ] +unique_gene_names <- as.list(filtered_markers$V1) +colnames(filtered_markers) <- c("GENE", "MARKER_TYPE", "EXPRESSION_LEVEL", "HGNC_ID", "CANCER_TYPE", "HISTOLOGICAL_TYPE", "CELL_LINE", "CSC_ENRICHMENT", "METHOD", "CONFIDENCE_SCORING", "GLOBAL_SCORING", "PUBMED_ID") +write.csv(filtered_markers, "/Users/xuexiao/Library/CloudStorage/GoogleDrive-heinyxiao@gmail.com/My Drive/Lab/Projects/Dedifferentiation/Data/filtered_markers_above_0_005.csv") +``` + diff --git a/CellChat2.Rmd b/CellChat2.Rmd new file mode 100644 index 0000000..d581884 --- /dev/null +++ b/CellChat2.Rmd @@ -0,0 +1,174 @@ +--- +title: "OC_CAF_Crosstalk" +output: html_document +date: "2024-05-21" +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Package Installation +```{r, eval=FALSE} +# install.packages("devtools") +devtools::install_github("immunogenomics/presto") +devtools::install_github("jinworks/cellchat") +``` + +## Load Packages +```{r} +library(Seurat) +library(cellchat) +library(ggplot2) +library(ggplotify) +``` + + +## Load Data +```{r} +load("~/Library/CloudStorage/GoogleDrive-heinyxiao@gmail.com/My Drive/Lab/Projects/OC_CAF_Crosstalk/GSE165897/Secreted_Signaling_GSE165897_Object.RData") +load("~/Desktop/Lab/Projects/OC_CAF_Crosstalk/Secreted_Signalingin_house_Object.RData") +load("~/Desktop/Lab/Projects/OC_CAF_Crosstalk/Secreted_Signaling_Object.RData") +cellchat@meta$labels[cellchat@meta$labels == "Epithelial_cells"] <- "OC_cells" +cellchat@meta$labels[cellchat@meta$labels == "Smooth_muscle_cells"] <- "CAFs" +table(cellchat@idents) +cellchat <- setIdent(cellchat, ident.use = "labels") +``` + +## Create cellchat Object +```{r} +data.input <- in_house_seurat[["RNA"]]$data # normalized data matrix +# For Seurat version >= “5.0.0”, get the normalized data via `seurat_object[["RNA"]]$data` +Idents(in_house_seurat) <- "subcluster" +labels <- Idents(in_house_seurat) +colnames(in_house_seurat@meta.data) +meta <- data.frame(labels = labels, row.names = names(labels)) # create a dataframe of the cell labels + +cellchat <- createcellchat(object = in_house_seurat, group.by = "subcluster", assay = "RNA") + +``` +## Set the ligand-receptor interaction database +```{r} +cellchatDB <- cellchatDB.human +cellchatDB.use <- subsetDB(cellchatDB, search = "Secreted Signaling", key = "annotation") +cellchat@DB <- cellchatDB.use +cellchat <- subsetData(cellchat) +``` + +## Run cellchat +```{r} +ptm = Sys.time() +future::plan("multisession", workers = 8) +cellchat <- identifyOverExpressedGenes(cellchat) +cellchat <- identifyOverExpressedInteractions(cellchat) +execution.time = Sys.time() - ptm +print(as.numeric(execution.time, units = "secs")) +cellchat <- computeCommunProb(cellchat, type = "triMean") +cellchat <- filterCommunication(cellchat, min.cells = 10) +cellchat <- computeCommunProbPathway(cellchat) +``` + +```{r} +cellchat <- aggregateNet(cellchat) +execution.time = Sys.time() - ptm +print(as.numeric(execution.time, units = "secs")) +``` + + +## Visualization +```{r} +# Aggregated Cell-Cell Communication Network (Total Interactions) +ptm = Sys.time() +groupSize <- as.numeric(table(cellchat@idents)) +par(mfrow = c(1,2), xpd=TRUE) +netVisual_circle(cellchat@net$count, vertex.weight = groupSize, weight.scale = T, label.edge= F, title.name = "Number of interactions") +netVisual_circle(cellchat@net$weight, vertex.weight = groupSize, weight.scale = T, label.edge= F, title.name = "Interaction weights/strength") + +plot1 <- as.ggplot(~netVisual_circle(cellchat@net$count, vertex.weight = groupSize, + weight.scale = TRUE, label.edge = FALSE, + title.name = "Number of interactions")) +# Save the first plot +ggsave("Number_of_interactions.pdf", plot = plot1, width = 6, height = 6) + +# Convert the second plot to a ggplot object +plot2 <- as.ggplot(~netVisual_circle(cellchat@net$weight, vertex.weight = groupSize, + weight.scale = TRUE, label.edge = FALSE, + title.name = "Interaction weights/strength")) +# Save the second plot +ggsave("Interaction_weights_strength.pdf", plot = plot2, width = 6, height = 6) +getwd() +setwd("/Users/xuexiao/Library/CloudStorage/GoogleDrive-heinyxiao@gmail.com/My Drive/Lab/Figure/Data") +``` + + +```{r} +# Network Centrality Scores +cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP") +netAnalysis_signalingRole_network(cellchat, signaling = pathways.show, width = 13, height = 5, font.size = 10) +``` +```{r} +# Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways +ht1 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "outgoing") +ht2 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "incoming") +ht1 + ht2 +netAnalysis_signalingRole_heatmap(cellchat, signaling = c("PDGF", "ncWNT"), width = 8, height = 5, font.size = 10) +ht +``` +```{r} +# show all the significant signaling pathways from some cell groups (defined by 'sources.use') to other cell groups (defined by 'targets.use') +netVisual_chord_gene(cellchat, sources.use = c(1:3), targets.use = c(4:13), slot.name = "netP", legend.pos.x = 10, small.gap = 0.2, lab.cex = 0.5) +``` + + +```{r} +pathways.show <- c("PDGF") +``` + +### Circle plot +```{r} +par(mfrow=c(1,1)) +netVisual_aggregate(cellchat, signaling = pathways.show, layout = "circle") +``` +### Chord diagram +```{r} +par(mfrow=c(1,1)) +netVisual_aggregate(cellchat, signaling = pathways.show, layout = "chord") +``` +### Heatmap +```{r} +par(mfrow=c(1,1)) +netVisual_heatmap(cellchat, signaling = pathways.show, color.heatmap = "Reds") +``` +### Contribution +```{r} +netAnalysis_contribution(cellchat, signaling = pathways.show, font.size = 10, width = 20) + +``` +### Single L-R pair +```{r} +pairLR.PDGF <- extractEnrichedLR(cellchat, signaling = pathways.show, geneLR.return = FALSE) +LR.show <- pairLR.PDGF[4,] # show one ligand-receptor pair +``` + +```{r} +# Chord plot +netVisual_individual(cellchat, signaling = pathways.show, pairLR.use = LR.show, layout = "chord") +# Circle plot +#netVisual_individual(cellchat, signaling = pathways.show, pairLR.use = LR.show, layout = "circle") +``` +```{r} +netVisual_bubble(cellchat, sources.use = c(4:13), targets.use = c(1:3), signaling = c("PDGF"), remove.isolate = FALSE, sort.by.target = TRUE) +netVisual_bubble(cellchat, sources.use = c(1:3), targets.use = c(4:13), signaling = c("PDGF"), remove.isolate = FALSE) +netVisual_bubble(cellchat, sources.use = c(1:3), targets.use = c(4:13), signaling = c("ncWNT"), remove.isolate = FALSE) +``` +```{r} +netVisual_chord_gene(cellchat, sources.use = c(4:13), targets.use = c(1:3), signaling = c("PDGF"),legend.pos.x = 8) +netVisual_chord_gene(cellchat, sources.use = c(1:3), targets.use = c(4:13), signaling = c("ncWNT"),legend.pos.x = 8, small.gap = 0.1) + +``` + +## Save CellChat object +```{r} +saveRDS(cellChat, file = "~/Library/CloudStorage/GoogleDrive-heinyxiao@gmail.com/My Drive/Lab/Projects/OC_CAF_Crosstalk/Imputed_in_house/cellchat_in_house.rds") +``` + diff --git a/MAGIC_OC.ipynb b/MAGIC_OC.ipynb new file mode 100644 index 0000000..9152bf6 --- /dev/null +++ b/MAGIC_OC.ipynb @@ -0,0 +1,1295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cbf4bb1b-046e-4e72-b71b-3d3c2d3605a8", + "metadata": {}, + "source": [ + "## MAGIC Imputation of ovarian cancer cells" + ] + }, + { + "cell_type": "markdown", + "id": "0caa3e47-ce4e-4a2f-a33f-461c42a1975a", + "metadata": {}, + "source": [ + "MAGIC first learns the data's underlying structure and then smooths gene expression values over this structure using a cell-cell affinity graph to construct a Markov diffusion operator. This operator is used to diffuse data between cells, filling in missing values and denoising the data." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4301bf07-0b11-494a-bfed-bf5f8b62af95", + "metadata": {}, + "outputs": [], + "source": [ + "# Load packages\n", + "import magic\n", + "import scprep\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Matplotlib command for Jupyter notebooks only\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "e9205d2b-6bcc-48c9-9ff7-1d9f54e8d4c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MIR1302-2HGFAM138AOR4F5AL627309.1AL627309.3AL627309.2AL627309.5AL627309.4AP006222.2AL732372.1...AC133551.1AC136612.1AC136616.1AC136616.3AC136616.2AC141272.1AC023491.2AC007325.1AC007325.4AC007325.2
AS_AAACCCAAGCGTTAGG-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACCCACAAACCACT-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACCCACAACGTTAC-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACCCAGTCCGGTGT-10.00.00.00.00.00.01.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACGAAAGTGGACTG-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACGAACAATTCTTC-10.00.00.00.00.00.01.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACGAAGTAGGAGGG-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACGAATCTATGCCC-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAACGCTTCTGATGGT-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
AS_AAAGAACGTCGAGTTT-10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

10 rows × 36601 columns

\n", + "
" + ], + "text/plain": [ + " MIR1302-2HG FAM138A OR4F5 AL627309.1 AL627309.3 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " AL627309.2 AL627309.5 AL627309.4 AP006222.2 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 1.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 1.0 0.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.0 0.0 0.0 \n", + "\n", + " AL732372.1 ... AC133551.1 AC136612.1 AC136616.1 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 ... 0.0 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 ... 0.0 0.0 0.0 \n", + "\n", + " AC136616.3 AC136616.2 AC141272.1 AC023491.2 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.0 0.0 0.0 \n", + "\n", + " AC007325.1 AC007325.4 AC007325.2 \n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 0.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.0 0.0 \n", + "\n", + "[10 rows x 36601 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load data - expression matrix\n", + "sc_data = scprep.io.load_csv(\"/Users/xuexiao/Desktop/Lab/Projects/Dedifferentiation/Data/in_house_oc_data.csv\", cell_axis= 'column')\n", + "\n", + "# Check data (first 10 rows)\n", + "sc_data.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "ea65f9ae-3844-4968-95fa-8713550ba91b", + "metadata": {}, + "source": [ + "After loading your data, you're going to want to determine the molecule per cell and molecule per gene cutoffs with which to filter the data, in order to remove lowly expressed genes and cells with a small library size." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "4f1f2d21-22b0-4d02-a7f8-3adf4ed192a2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/xuexiao/.local/lib/python3.10/site-packages/scprep/plot/utils.py:104: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown\n", + " fig.show()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AL627309.1AL627309.5LINC01409LINC01128LINC00115FAM41CLINC02593SAMD11NOC2LKLHL17...MT-ND6MT-CYBBX004987.1AC145212.1MAFIPAC011043.1AL354822.1AL592183.1AC240274.1AC007325.4
AS_AAACCCAAGCGTTAGG-10.00.00.00.00.00.00.00.00.00.0...0.08.00.00.00.00.00.00.00.00.0
AS_AAACCCACAAACCACT-10.00.01.00.00.00.00.00.01.00.0...4.0137.00.00.00.00.00.00.00.00.0
AS_AAACCCACAACGTTAC-10.00.01.00.00.01.00.00.00.00.0...7.051.01.00.00.00.00.00.00.00.0
AS_AAACCCAGTCCGGTGT-10.01.00.00.00.00.00.00.02.00.0...5.0108.00.00.00.00.00.00.00.00.0
AS_AAACGAAAGTGGACTG-10.00.00.00.00.00.00.00.03.00.0...1.0136.00.00.00.00.00.01.00.00.0
AS_AAACGAACAATTCTTC-10.01.01.01.00.01.00.00.02.00.0...9.0417.00.01.01.00.00.04.00.00.0
AS_AAACGAAGTAGGAGGG-10.00.00.00.00.00.00.01.00.00.0...0.050.00.00.00.00.00.00.00.00.0
AS_AAACGAATCTATGCCC-10.00.00.00.00.00.00.00.00.00.0...5.0163.00.00.00.00.00.00.01.00.0
AS_AAACGCTTCTGATGGT-10.00.00.00.00.00.00.00.01.00.0...1.063.00.00.00.00.00.01.00.00.0
AS_AAAGAACGTCGAGTTT-10.00.00.00.00.00.00.00.01.00.0...0.03.00.00.00.00.00.00.00.00.0
\n", + "

10 rows × 20555 columns

\n", + "
" + ], + "text/plain": [ + " AL627309.1 AL627309.5 LINC01409 LINC01128 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.0 1.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.0 1.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 1.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 1.0 1.0 1.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.0 0.0 0.0 \n", + "\n", + " LINC00115 FAM41C LINC02593 SAMD11 NOC2L KLHL17 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 0.0 0.0 0.0 2.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.0 0.0 0.0 3.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 1.0 0.0 0.0 2.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "\n", + " ... MT-ND6 MT-CYB BX004987.1 AC145212.1 MAFIP \\\n", + "AS_AAACCCAAGCGTTAGG-1 ... 0.0 8.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 ... 4.0 137.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 ... 7.0 51.0 1.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 ... 5.0 108.0 0.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 ... 1.0 136.0 0.0 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 ... 9.0 417.0 0.0 1.0 1.0 \n", + "AS_AAACGAAGTAGGAGGG-1 ... 0.0 50.0 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 ... 5.0 163.0 0.0 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 ... 1.0 63.0 0.0 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 ... 0.0 3.0 0.0 0.0 0.0 \n", + "\n", + " AC011043.1 AL354822.1 AL592183.1 AC240274.1 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.0 1.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 0.0 4.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.0 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.0 0.0 1.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.0 1.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.0 0.0 0.0 \n", + "\n", + " AC007325.4 \n", + "AS_AAACCCAAGCGTTAGG-1 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 \n", + "\n", + "[10 rows x 20555 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Filter data\n", + "scprep.plot.plot_library_size(sc_data, cutoff = 1500)\n", + "sc_data = scprep.filter.filter_library_size(sc_data, cutoff=1500)\n", + "sc_data = scprep.filter.filter_rare_genes(sc_data, min_cells=10)\n", + "sc_data.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "ae5e56de-3ceb-41e4-acde-a570ac541794", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AL627309.1AL627309.5LINC01409LINC01128LINC00115FAM41CLINC02593SAMD11NOC2LKLHL17...MT-ND6MT-CYBBX004987.1AC145212.1MAFIPAC011043.1AL354822.1AL592183.1AC240274.1AC007325.4
AS_AAACCCAAGCGTTAGG-10.00.0000000.0000000.0000000.00.0000000.00.0000000.0000000.0...0.0000003.6898110.0000000.0000000.0000000.00.00.0000000.0000000.0
AS_AAACCCACAAACCACT-10.00.0000000.5281620.0000000.00.0000000.00.0000000.5281620.0...1.0563256.1819830.0000000.0000000.0000000.00.00.0000000.0000000.0
AS_AAACCCACAACGTTAC-10.00.0000000.5810620.0000000.00.5810620.00.0000000.0000000.0...1.5373444.1496090.5810620.0000000.0000000.00.00.0000000.0000000.0
AS_AAACCCAGTCCGGTGT-10.00.4929280.0000000.0000000.00.0000000.00.0000000.6971050.0...1.1022205.1226570.0000000.0000000.0000000.00.00.0000000.0000000.0
AS_AAACGAAAGTGGACTG-10.00.0000000.0000000.0000000.00.0000000.00.0000000.8465760.0...0.4887715.7000020.0000000.0000000.0000000.00.00.4887710.0000000.0
AS_AAACGAACAATTCTTC-10.00.3080410.3080410.3080410.00.3080410.00.0000000.4356360.0...0.9241236.2903750.0000000.3080410.3080410.00.00.6160820.0000000.0
AS_AAACGAAGTAGGAGGG-10.00.0000000.0000000.0000000.00.0000000.00.7205070.0000000.0...0.0000005.0947520.0000000.0000000.0000000.00.00.0000000.0000000.0
AS_AAACGAATCTATGCCC-10.00.0000000.0000000.0000000.00.0000000.00.0000000.0000000.0...1.0134055.7861780.0000000.0000000.0000000.00.00.0000000.4532080.0
AS_AAACGCTTCTGATGGT-10.00.0000000.0000000.0000000.00.0000000.00.0000000.5972840.0...0.5972844.7407930.0000000.0000000.0000000.00.00.5972840.0000000.0
AS_AAAGAACGTCGAGTTT-10.00.0000000.0000000.0000000.00.0000000.00.0000000.8511330.0...0.0000001.4742060.0000000.0000000.0000000.00.00.0000000.0000000.0
\n", + "

10 rows × 20555 columns

\n", + "
" + ], + "text/plain": [ + " AL627309.1 AL627309.5 LINC01409 LINC01128 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.000000 0.000000 0.000000 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.000000 0.528162 0.000000 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.000000 0.581062 0.000000 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 0.492928 0.000000 0.000000 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.000000 0.000000 0.000000 \n", + "AS_AAACGAACAATTCTTC-1 0.0 0.308041 0.308041 0.308041 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.000000 0.000000 0.000000 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.000000 0.000000 0.000000 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.000000 0.000000 0.000000 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.000000 0.000000 0.000000 \n", + "\n", + " LINC00115 FAM41C LINC02593 SAMD11 NOC2L \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 0.000000 0.0 0.000000 0.000000 \n", + "AS_AAACCCACAAACCACT-1 0.0 0.000000 0.0 0.000000 0.528162 \n", + "AS_AAACCCACAACGTTAC-1 0.0 0.581062 0.0 0.000000 0.000000 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 0.000000 0.0 0.000000 0.697105 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 0.000000 0.0 0.000000 0.846576 \n", + "AS_AAACGAACAATTCTTC-1 0.0 0.308041 0.0 0.000000 0.435636 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 0.000000 0.0 0.720507 0.000000 \n", + "AS_AAACGAATCTATGCCC-1 0.0 0.000000 0.0 0.000000 0.000000 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 0.000000 0.0 0.000000 0.597284 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 0.000000 0.0 0.000000 0.851133 \n", + "\n", + " KLHL17 ... MT-ND6 MT-CYB BX004987.1 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.0 ... 0.000000 3.689811 0.000000 \n", + "AS_AAACCCACAAACCACT-1 0.0 ... 1.056325 6.181983 0.000000 \n", + "AS_AAACCCACAACGTTAC-1 0.0 ... 1.537344 4.149609 0.581062 \n", + "AS_AAACCCAGTCCGGTGT-1 0.0 ... 1.102220 5.122657 0.000000 \n", + "AS_AAACGAAAGTGGACTG-1 0.0 ... 0.488771 5.700002 0.000000 \n", + "AS_AAACGAACAATTCTTC-1 0.0 ... 0.924123 6.290375 0.000000 \n", + "AS_AAACGAAGTAGGAGGG-1 0.0 ... 0.000000 5.094752 0.000000 \n", + "AS_AAACGAATCTATGCCC-1 0.0 ... 1.013405 5.786178 0.000000 \n", + "AS_AAACGCTTCTGATGGT-1 0.0 ... 0.597284 4.740793 0.000000 \n", + "AS_AAAGAACGTCGAGTTT-1 0.0 ... 0.000000 1.474206 0.000000 \n", + "\n", + " AC145212.1 MAFIP AC011043.1 AL354822.1 \\\n", + "AS_AAACCCAAGCGTTAGG-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.308041 0.308041 0.0 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.000000 0.000000 0.0 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.000000 0.000000 0.0 0.0 \n", + "\n", + " AL592183.1 AC240274.1 AC007325.4 \n", + "AS_AAACCCAAGCGTTAGG-1 0.000000 0.000000 0.0 \n", + "AS_AAACCCACAAACCACT-1 0.000000 0.000000 0.0 \n", + "AS_AAACCCACAACGTTAC-1 0.000000 0.000000 0.0 \n", + "AS_AAACCCAGTCCGGTGT-1 0.000000 0.000000 0.0 \n", + "AS_AAACGAAAGTGGACTG-1 0.488771 0.000000 0.0 \n", + "AS_AAACGAACAATTCTTC-1 0.616082 0.000000 0.0 \n", + "AS_AAACGAAGTAGGAGGG-1 0.000000 0.000000 0.0 \n", + "AS_AAACGAATCTATGCCC-1 0.000000 0.453208 0.0 \n", + "AS_AAACGCTTCTGATGGT-1 0.597284 0.000000 0.0 \n", + "AS_AAAGAACGTCGAGTTT-1 0.000000 0.000000 0.0 \n", + "\n", + "[10 rows x 20555 columns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalization\n", + "sc_data = scprep.normalize.library_size_normalize(sc_data)\n", + "sc_data = scprep.transform.sqrt(sc_data)\n", + "sc_data.head(10)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2aad7866-f478-4689-ac44-287d6adfa2c0", + "metadata": {}, + "source": [ + "Default setting: MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "3f664a63-2962-45ab-bfbd-3e7b4628d14f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Calculating MAGIC...\n", + " Running MAGIC on 3576 cells and 20555 genes.\n", + " Calculating graph and diffusion operator...\n", + " Calculating PCA...\n", + " Calculated PCA in 12.43 seconds.\n", + " Calculating KNN search...\n", + " Calculated KNN search in 0.71 seconds.\n", + " Calculating affinities...\n", + " Calculated affinities in 0.63 seconds.\n", + " Calculated graph and diffusion operator in 13.83 seconds.\n", + " Running MAGIC with `solver='exact'` on 20555-dimensional data may take a long time. Consider denoising specific genes with `genes=` or using `solver='approximate'`.\n", + " Calculating imputation...\n", + " Calculated imputation in 3.31 seconds.\n", + "Calculated MAGIC in 17.26 seconds.\n" + ] + } + ], + "source": [ + "# Run MAGIC\n", + "magic_operator = magic.MAGIC()\n", + "sc_magic = magic_operator.fit_transform(sc_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "00b82a1e-694c-40ec-94e6-90ec969d5dc4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))\n", + "\n", + "scprep.plot.scatter(x=sc_data['ROR2'], y=sc_data['ALDH1A1'], c=sc_data['CREB1'], ax=ax1,\n", + " xlabel='ROR2', ylabel='ALDH1A1', legend_title=\"CREB1\", title='Before MAGIC')\n", + "\n", + "scprep.plot.scatter(x=sc_magic['ROR2'], y=sc_magic['ALDH1A1'], c=sc_magic['CREB1'], ax=ax2,\n", + " xlabel='ROR2', ylabel='ALDH1A1', legend_title=\"CREB1\", title='After MAGIC')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "268a2264-3ce4-49cc-94cc-d5972314271d", + "metadata": {}, + "outputs": [], + "source": [ + "# Save imputed data\n", + "sc_magic.to_csv(\"/Users/xuexiao/Desktop/Lab/Projects/Dedifferentiation/Data/in_house_oc_magic.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff979669-1a60-41ce-b9e9-1a0a66f38c41", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}