Commit 7dcd8057 authored by Monah Abou Alezz's avatar Monah Abou Alezz
Browse files

revisions_update_v1

parent 839b34cc
...@@ -73,6 +73,7 @@ DGE.results.sig <- subset(DGE.results.sorted, ...@@ -73,6 +73,7 @@ DGE.results.sig <- subset(DGE.results.sorted,
FDR < adj.pval.thr & abs(logFC) > logfc.thr) FDR < adj.pval.thr & abs(logFC) > logfc.thr)
DGEgenes <- rownames(DGE.results.sig) DGEgenes <- rownames(DGE.results.sig)
# ED_Fig5A ----------------------------------------------------------------- # ED_Fig5A -----------------------------------------------------------------
## scorecard ## scorecard
......
This diff is collapsed.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import celloracle as co\n",
"from celloracle.applications import Pseudotime_calculator\n",
"from celloracle.applications import Gradient_calculator\n",
"from celloracle.applications import Oracle_development_module\n",
"import anndata2ri\n",
"import os\n",
"import sys\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"import seaborn as sns\n",
"import copy\n",
"import glob\n",
"import time\n",
"import shutil\n",
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting parameters settings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# visualization settings\n",
"%config InlineBackend.figure_format = 'retina'\n",
"%matplotlib inline\n",
"plt.rcParams['figure.figsize'] = [6, 4.5]\n",
"plt.rcParams[\"savefig.dpi\"] = 300"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"save_folder = \"Scarfo_HEC2023/scRNAseq\"\n",
"os.makedirs(save_folder, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Seurat object in Python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Activate the anndata2ri conversion between SingleCellExperiment and AnnData\n",
"anndata2ri.activate()\n",
"#Loading the rpy2 extension\n",
"%load_ext rpy2.ipython\n",
"sc.settings.verbosity = 3\n",
"sc.logging.print_versions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"suppressPackageStartupMessages(library(Seurat))\n",
"runx1_obj <- readRDS(\"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R -o runx1_obj_sce\n",
"#convert the Seurat object to a SingleCellExperiment object\n",
"runx1_obj_sce <- as.SingleCellExperiment(runx1_obj)\n",
"runx1_obj_sce"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"adata = runx1_obj_sce\n",
"adata"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sc.pl.umap(adata, color='RNA_snn_new_res.0.6')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Constructing GRN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select top 3000 highly-variable genes\n",
"filter_result = sc.pp.filter_genes_dispersion(adata.X,flavor='cell_ranger',n_top_genes=3000,log=False)\n",
"# Subset the genes\n",
"adata = adata[:, filter_result.gene_subset]\n",
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load data from CellTalkDB\n",
"ligand_receptor = pd.read_csv(\"Scarfo_HEC2023/scRNAseq/CellTalkDB_modified.txt\", sep='\\t')\n",
"ligand_receptor.columns = ['Ligand','Receptor']\n",
"ligand_receptor\n",
"print('FCGR2B' in ligand_receptor['Ligand'].unique())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Make dictionary: dictionary key is Ligand and dictionary value is list of target genes.\n",
"LR_to_TG_dictionary = {}\n",
"for LR, TGs in zip(ligand_receptor.Ligand, ligand_receptor.Receptor):\n",
" TG_list = TGs.replace(\" \", \"\").split(\",\")\n",
" LR_to_TG_dictionary[LR] = TG_list\n",
"TG_to_LR_dictionary = co.utility.inverse_dictionary(LR_to_TG_dictionary)\n",
"TG_to_LR_dictionary"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Oracle object\n",
"oracle = co.Oracle()\n",
"oracle.import_anndata_as_raw_count(adata=adata,cluster_column_name=\"RNA_snn_new_res.0.6\",embedding_name=\"X_umap\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add LR information \n",
"oracle.addTFinfo_dictionary(TG_to_LR_dictionary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Perform PCA\n",
"oracle.perform_PCA()\n",
"# Select important PCs\n",
"plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])\n",
"n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]\n",
"plt.axvline(n_comps, c=\"k\")\n",
"plt.show()\n",
"print(n_comps)\n",
"n_comps = min(n_comps, 50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimate the optimal number of nearest neighbors for KNN imputation\n",
"n_cell = oracle.adata.shape[0]\n",
"print(f\"cell number is :{n_cell}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"k = int(0.025*n_cell)\n",
"print(f\"Auto-selected k is :{k}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# perform KNN imputation\n",
"oracle.knn_imputation(n_pca_dims=n_comps,k=k, balanced=True,b_sight=k*8,b_maxl=k*4,n_jobs=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# GRN calculation\n",
"%time\n",
"links = oracle.get_links(cluster_name_for_GRN_unit=\"RNA_snn_new_res.0.6\", alpha=10,verbose_level=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"links.links_dict.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# filter weak edges\n",
"links.filter_links(p=0.001, weight=\"coef_abs\", threshold_number=2000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# network degree distribution\n",
"links.plot_degree_distributions(plot_model=True,\n",
" save=f\"{save_folder}/degree_distribution/\",)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate network scores.\n",
"links.get_network_score()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding pseudotime to oracle object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate pseudotime object using oracle object.\n",
"pt = Pseudotime_calculator(oracle_object=oracle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clusters_in_RUNX1_lineage = ['0','1','2','11','16','17']\n",
"# Make a dictionary\n",
"lineage_dictionary = {\"Lineage_RUNX1\": clusters_in_RUNX1_lineage}\n",
"# Input lineage information into pseudotime object\n",
"pt.set_lineage(lineage_dictionary=lineage_dictionary)\n",
"# Visualize lineage information\n",
"pt.plot_lineages()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add root cell information\n",
"# plotly is required\n",
"try:\n",
" import plotly.express as px\n",
" def plot(adata, embedding_key, cluster_column_name):\n",
" embedding = adata.obsm[embedding_key]\n",
" df = pd.DataFrame(embedding, columns=[\"x\", \"y\"])\n",
" df[\"cluster\"] = adata.obs[cluster_column_name].values\n",
" df[\"label\"] = adata.obs.index.values\n",
" fig = px.scatter(df, x=\"x\", y=\"y\", hover_name=df[\"label\"], color=\"cluster\")\n",
" fig.show()\n",
" plot(adata=pt.adata,\n",
" embedding_key=pt.obsm_key,\n",
" cluster_column_name=pt.cluster_column_name)\n",
"except:\n",
" print(\"Plotly not found in your environment. Did you install plotly?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimated root cell name for each lineage\n",
"root_cells = {\"Lineage_RUNX1\": \"Sample1_GGAGAACTCGCGGTAC-1\"}\n",
"pt.set_root_cells(root_cells=root_cells)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check diffusion map data.\n",
"\"X_diffmap\" in pt.adata.obsm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# calculate diffusion map\n",
"sc.pp.neighbors(pt.adata, n_neighbors=30)\n",
"sc.tl.diffmap(pt.adata)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate pseudotime\n",
"pt.get_pseudotime_per_each_lineage()\n",
"# Check results\n",
"pt.plot_pseudotime(cmap=\"rainbow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add calculated pseudotime data to the oracle object\n",
"oracle.adata.obs = pt.adata.obs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## In silico perturbation analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# make pridictive models for simulation and fit the ridge regression models again\n",
"links.filter_links()\n",
"oracle.get_cluster_specific_TFdict_from_Links(links_object=links)\n",
"oracle.fit_GRN_for_simulation(alpha=10, use_cluster_specific_TFdict=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"goi = \"FCGR2B\"\n",
"# Enter perturbation conditions to simulate signal propagation after the perturbation.\n",
"oracle.simulate_shift(perturb_condition={goi: 0.0}, n_propagation=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get transition probability\n",
"oracle.estimate_transition_prob(n_neighbors=200,knn_random=True,sampled_fraction=1)\n",
"# Calculate embedding\n",
"oracle.calculate_embedding_shift(sigma_corr=0.05)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quiver plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 6])\n",
"scale = 25\n",
"# Show quiver plot\n",
"oracle.plot_quiver(scale=scale, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_quiver_random(scale=scale, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# establishing digitalized grids\n",
"n_grid = 40\n",
"oracle.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Search for best min_mass.\n",
"oracle.suggest_mass_thresholds(n_suggestion=12)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min_mass = 19\n",
"oracle.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# shift vector plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 5])\n",
"scale_simulation = 2\n",
"# Show quiver plot\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_simulation_flow_random_on_grid(scale=scale_simulation, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot vector field with cell cluster\n",
"fig, ax = plt.subplots(figsize=[8, 8])\n",
"oracle.plot_cluster_whole(ax=ax, s=10)\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax, show_background=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize pseudotime\n",
"fig, ax = plt.subplots(figsize=[6,6])\n",
"sc.pl.embedding(adata=oracle.adata, basis=oracle.embedding_name, ax=ax, cmap=\"viridis\",\n",
" color=[\"Pseudotime\"], save=\"pseudotime.pdf\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Gradient calculator object\n",
"gradient = Gradient_calculator(oracle_object=oracle, pseudotime_key=\"Pseudotime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gradient.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)\n",
"gradient.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# convert pseudotime intop grid points\n",
"gradient.transfer_data_into_grid(args={\"method\": \"polynomial\", \"n_poly\":5}, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate graddient\n",
"gradient.calculate_gradient()\n",
"# Show results\n",
"scale_dev = 20\n",
"gradient.visualize_results(scale=scale_dev, s=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize results\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"gradient.plot_dev_flow_on_grid(scale=scale_dev, ax=ax)\n",
"ax.set_title(f\"Normal Developmental flow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quantitatively compare the directionality and size of vectors between perturbation simulation and natural differentiation using inner product\n",
"# Make Oracle_development_module to compare two vector field\n",
"dev = Oracle_development_module()\n",
"# Load development flow\n",
"dev.load_differentiation_reference_data(gradient_object=gradient)\n",
"# Load simulation result\n",
"dev.load_perturb_simulation_data(oracle_object=oracle)\n",
"# Calculate inner produc scores\n",
"dev.calculate_inner_product()\n",
"dev.calculate_digitized_ip(n_bins=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores\n",
"vm = 0.2\n",
"fig, ax = plt.subplots(1, 2, figsize=[12, 5])\n",
"dev.plot_inner_product_on_grid(vm=0.02, s=50, ax=ax[0])\n",
"ax[0].set_title(f\"PS calculated with FCGR2B KO\")\n",
"dev.plot_inner_product_random_on_grid(vm=vm, s=50, ax=ax[1])\n",
"ax[1].set_title(f\"PS calculated with Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores with perturbation simulation vector field\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"dev.plot_inner_product_on_grid(vm=0.05, s=50, ax=ax)\n",
"dev.plot_simulation_flow_on_grid(scale=scale_simulation, show_background=False, ax=ax)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# summary plots\n",
"# visualize the results\n",
"dev.visualize_development_module_layout_0(s=5, scale_for_simulation=scale_simulation, s_grid=50, scale_for_pseudotime=scale_dev, vm=0.05)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:postsc]",
"language": "python",
"name": "conda-env-postsc-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This diff is collapsed.
...@@ -2,9 +2,318 @@ suppressPackageStartupMessages(library(Seurat)) ...@@ -2,9 +2,318 @@ suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(monocle3)) suppressPackageStartupMessages(library(monocle3))
suppressPackageStartupMessages(library(ggplot2)) suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr)) suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ComplexHeatmap))
# ED_Fig5B ---------------------------------------------------------------- # ED_Fig5B ----------------------------------------------------------------
## load scRNAseq data from public dataset GSE162950
load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata")
sample <- SetIdent(sample, value = sample@meta.data$seurat_clusters)
DimPlot(sample, label=TRUE, group.by = "seurat_clusters")
sample@meta.data[["orig.ident"]] <- factor(sample@meta.data[["orig.ident"]],
levels = c("AGM_CS10_Liu",
"AGM_CS11_Liu",
"AGM_CS13_Liu",
"AGM_4wk_658",
"AGM_5wk_555",
"AGM_5wk_575",
"AGM_6wk_563",
"YolkSac_CS11_Liu"))
DimPlot(sample,
label=F,
repel = T,
reduction="umap",
group.by="orig.ident",
pt.size=0.1,
cols = c("AGM_CS10_Liu" = "#00BFC4",
"AGM_CS11_Liu" = "#00A9FF",
"AGM_CS13_Liu" = "#C77CFF",
"AGM_4wk_658" = "#F8766D",
"AGM_5wk_555" = "#CD9600",
"AGM_5wk_575" = "#7CAE00",
"AGM_6wk_563" = "#00BE67",
"YolkSac_CS11_Liu" = "#FF61CC"))
# ED_Fig5C ----------------------------------------------------------------
pal <- viridis(n = 10, option = "D")
FeaturePlot(sample,
features = c("HOXA9", "HOXA10"),
ncol = 2,
repel = T,
pt.size = 0.05,
order = T,
cols = pal)
# ED_Fig5E ----------------------------------------------------------------
install_github("sturgeonlab/Luff-etal-2021/SingleR/versioncontrolscripts")
library(versioncontrolscripts)
## load reference dataset
cd32_dll4 <- read.table("Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_tpm.txt",
header=T, sep="\t")
## load Seurat data
load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata")
sample <- SetIdent(sample, value = sample@meta.data$seurat_clusters)
sample_noYS <- subset(sample, orig.ident != "YolkSac_CS11_Liu")
sample_noYS@meta.data[["orig.ident"]] <-
factor(
sample_noYS@meta.data[["orig.ident"]],
levels = c(
"AGM_CS10_Liu",
"AGM_CS11_Liu",
"AGM_CS13_Liu",
"AGM_4wk_658",
"AGM_5wk_555",
"AGM_5wk_575",
"AGM_6wk_563"
)
)
## defining HEC and AEC
aec <- WhichCells(sample_noYS,
expression = CDH5 > 0.2 &
CXCR4 > 0.3 &
GJA5 > 0.2 &
DLL4 > 0.1 &
PTPRC < 0.1 &
SPN < 0.05 &
HEY2 > 0.15)
hec <- WhichCells(sample_noYS,
expression = RUNX1 > 0.1 &
CDH5 > 0.2 &
HOXA9 > 0.05 &
PTPRC < 0.1 &
ITGA2B < 0.1 &
SPN < 0.05)
Idents(sample_noYS, cells=aec) = "AEC"
Idents(sample_noYS, cells=hec) = "HEC"
sample_noYS$CellType <- Idents(sample_noYS)
sample_noYS <- subset(sample_noYS, CellType=="AEC" | CellType=="HEC")
Idents(sample_noYS) <- "CellType"
## setting up reference dataset
bulk <- cd32_dll4[,-c(2:6)]
bulk <- bulk[,c(1,2,3,4,6,7,9)]
colnames(bulk) <- c("genes",
"RS227_CD184","RS227_CD32_plus",
"RS242_CD184","RS242_CD32_plus",
"RS243_CD184","RS243_CD32_plus")
bulk$CD184 <- rowMeans(bulk[,c(2,4,6)])
bulk$CD32_plus <- rowMeans(bulk[,c(3,5,7)])
bulk <- bulk[,c(1,9,8)]
types = list("CD32_plus","CD184")
main_types = list("CD32_plus","CD184")
x = bulk[,1]
loss = bulk[,-1]
rownames(loss) = make.names(x, unique = TRUE)
bulk <- loss
bulk = as.matrix(bulk)
types = as.character(types)
main_types = as.character(main_types)
myref = list(data = bulk, main_types = main_types, types = types)
myref[["de.genes"]] = CreateVariableGeneSet(bulk,types,200)
myref[["de.genes.main"]] = CreateVariableGeneSet(bulk,main_types,300)
labels <- sample_noYS@meta.data$orig.ident
stage <- sample_noYS@meta.data$CellType
stage2 <- data.frame(stage)
stage2$stage <- as.character(stage2$stage)
stage2 <- stage2$stage
# create singlecell dataset for comparison to bulk RNAseq
data <- GetAssayData(sample_noYS, assay = "RNA", slot = "data")
data <- as.matrix(data)
# run main SingleR function & add names for cluster
obj <- SingleR.CreateObject(data,
myref,
clusters = NULL,
variable.genes = "de",
fine.tune = F)
obj[["names"]] <- stage2
obj[["names2"]] <- labels
data <- t(obj$SingleR.single.main$r)
cell_types = as.data.frame(obj$names)
colnames(cell_types) = 'CellTypes'
rownames(cell_types) = colnames(data)
test <- as.data.frame(obj$names2)
colnames(test) <- "Samples"
rownames(test) <- colnames(data)
clusters <- cbind(cell_types, test)
breaksList = seq(0, 1, by = 0.01)
# export relative correlation scores and names for each cell in cs dataset
scores = obj$SingleR.single.main$r
datascores <- as.matrix(scores)
scores <- data.frame(datascores)
type <- data.frame(obj$names)
cells <- data.frame(obj$names2)
scores$type <- type$obj.names
scores$cells <- cells$obj.names2
#successive sorting orders cells in aesthetically-pleasing manner
for (k in unique(scores$cells)) {
clu <- filter(scores, cells == k)
clu <- clu[order(clu$CD184), ]
clu <- clu[order(clu$CD32_plus), ]
clu_cells <- rownames(clu)
assign(paste0("clu",k,"_cells"),clu_cells)
}
## reorder both clusters and HEC and AEC cells
agm_4 <- filter(scores, cells == "AGM_4wk_658")
agm_4_hec <- filter(agm_4, type == "HEC")
agm_4_hec <- agm_4_hec[order(agm_4_hec$CD184),]
agm_4_hec <- agm_4_hec[order(agm_4_hec$CD32_plus),]
agm_4_aec <- filter(agm_4, type == "AEC")
agm_4_aec <- agm_4_aec[order(agm_4_aec$CD184),]
agm_4_aec <- agm_4_aec[order(agm_4_aec$CD32_plus),]
agm_5_1 <- filter(scores, cells == "AGM_5wk_555")
agm_5_1_hec <- filter(agm_5_1, type == "HEC")
agm_5_1_hec <- agm_5_1_hec[order(agm_5_1_hec$CD184),]
agm_5_1_hec <- agm_5_1_hec[order(agm_5_1_hec$CD32_plus),]
agm_5_1_aec <- filter(agm_5_1, type == "AEC")
agm_5_1_aec <- agm_5_1_aec[order(agm_5_1_aec$CD184),]
agm_5_1_aec <- agm_5_1_aec[order(agm_5_1_aec$CD32_plus),]
agm_5_2 <- filter(scores, cells == "AGM_5wk_575")
agm_5_2_hec <- filter(agm_5_2, type == "HEC")
agm_5_2_hec <- agm_5_2_hec[order(agm_5_2_hec$CD184),]
agm_5_2_hec <- agm_5_2_hec[order(agm_5_2_hec$CD32_plus),]
agm_5_2_aec <- filter(agm_5_2, type == "AEC")
agm_5_2_aec <- agm_5_2_aec[order(agm_5_2_aec$CD184),]
agm_5_2_aec <- agm_5_2_aec[order(agm_5_2_aec$CD32_plus),]
agm_6 <- filter(scores, cells == "AGM_6wk_563")
agm_6_hec <- filter(agm_6, type == "HEC")
agm_6_hec <- agm_6_hec[order(agm_6_hec$CD184),]
agm_6_hec <- agm_6_hec[order(agm_6_hec$CD32_plus),]
agm_6_aec <- filter(agm_6, type == "AEC")
agm_6_aec <- agm_6_aec[order(agm_6_aec$CD184),]
agm_6_aec <- agm_6_aec[order(agm_6_aec$CD32_plus),]
agm_cs10 <- filter(scores, cells == "AGM_CS10_Liu")
agm_cs10_hec <- filter(agm_cs10, type == "HEC")
agm_cs10_hec <- agm_cs10_hec[order(agm_cs10_hec$CD184),]
agm_cs10_hec <- agm_cs10_hec[order(agm_cs10_hec$CD32_plus),]
agm_cs10_aec <- filter(agm_cs10, type == "AEC")
agm_cs10_aec <- agm_cs10_aec[order(agm_cs10_aec$CD184),]
agm_cs10_aec <- agm_cs10_aec[order(agm_cs10_aec$CD32_plus),]
agm_cs11 <- filter(scores, cells == "AGM_CS11_Liu")
agm_cs11_hec <- filter(agm_cs11, type == "HEC")
agm_cs11_hec <- agm_cs11_hec[order(agm_cs11_hec$CD184),]
agm_cs11_hec <- agm_cs11_hec[order(agm_cs11_hec$CD32_plus),]
agm_cs11_aec <- filter(agm_cs11, type == "AEC")
agm_cs11_aec <- agm_cs11_aec[order(agm_cs11_aec$CD184),]
agm_cs11_aec <- agm_cs11_aec[order(agm_cs11_aec$CD32_plus),]
agm_cs13 <- filter(scores, cells == "AGM_CS13_Liu")
agm_cs13_hec <- filter(agm_cs13, type == "HEC")
agm_cs13_hec <- agm_cs13_hec[order(agm_cs13_hec$CD184),]
agm_cs13_hec <- agm_cs13_hec[order(agm_cs13_hec$CD32_plus),]
agm_cs13_aec <- filter(agm_cs13, type == "AEC")
agm_cs13_aec <- agm_cs13_aec[order(agm_cs13_aec$CD184),]
agm_cs13_aec <- agm_cs13_aec[order(agm_cs13_aec$CD32_plus),]
agm_4_hec.cells <- row.names(agm_4_hec)
agm_4_aec.cells <- row.names(agm_4_aec)
agm_5_1_hec.cells <- row.names(agm_5_1_hec)
agm_5_1_aec.cells <- row.names(agm_5_1_aec)
agm_5_2_hec.cells <- row.names(agm_5_2_hec)
agm_5_2_aec.cells <- row.names(agm_5_2_aec)
agm_6_hec.cells <- row.names(agm_6_hec)
agm_6_aec.cells <- row.names(agm_6_aec)
agm_cs10_hec.cells <- row.names(agm_cs10_hec)
agm_cs10_aec.cells <- row.names(agm_cs10_aec)
agm_cs11_hec.cells <- row.names(agm_cs11_hec)
agm_cs11_aec.cells <- row.names(agm_cs11_aec)
agm_cs13_hec.cells <- row.names(agm_cs13_hec)
agm_cs13_aec.cells <- row.names(agm_cs13_aec)
order <- c(
agm_cs10_aec.cells,
agm_cs11_aec.cells,
agm_cs13_aec.cells,
agm_4_aec.cells,
agm_5_1_aec.cells,
agm_5_2_aec.cells,
agm_6_aec.cells,
agm_cs10_hec.cells,
agm_cs11_hec.cells,
agm_cs13_hec.cells,
agm_4_hec.cells,
agm_5_1_hec.cells,
agm_5_2_hec.cells,
agm_6_hec.cells
)
data_reorder <- data[,match(order, colnames(data))]
annot_colors <- list(CellTypes=c(HEC = "red",
AEC = "royalblue3"),
Samples = c(AGM_CS10_Liu = "#00BFC4",
AGM_CS11_Liu = "#00A9FF",
AGM_CS13_Liu = "#C77CFF",
AGM_4wk_658 = "#F8766D",
AGM_5wk_555 = "#CD9600",
AGM_5wk_575 = "#7CAE00",
AGM_6wk_563 = "#00BE67"))
annot <- clusters
colours <- annot_colors
colAnn <- HeatmapAnnotation(df = clusters,
which = 'col',
show_annotation_name = T,
col = annot_colors,
annotation_legend_param = list(
CellTypes = list(
title = "Cell Type",
title_gp = gpar(fontsize = 14*96/72,
fontface = "bold"),
labels_gp = gpar(fontsize = 14*96/72),
grid_height = unit(0.7, "cm")),
Samples = list(
title = "Samples",
title_gp = gpar(fontsize = 14*96/72,
fontface = "bold"),
labels_gp = gpar(fontsize = 14*96/72),
grid_height = unit(0.7, "cm"))),
annotation_width = unit(c(1, 4), 'cm'),
gap = unit(1, 'mm'))
counts_scaled <- t(scale(t(data_reorder)))
counts_scaled_ordered <- counts_scaled[, match(rownames(clusters),colnames(counts_scaled))]
clusters$DF <- paste(clusters$CellTypes, clusters$Samples, sep = "_")
dd <- cluster_within_group(counts_scaled_ordered, clusters$DF)
hmap <- Heatmap(counts_scaled_ordered,
name = "Z-score",
show_column_names = F,
show_column_dend = F,
show_parent_dend_line = F,
show_row_dend = F,
cluster_rows = F,
cluster_columns = dd,
column_gap = unit(c(2,2,5,2,5,2,5,2,
2,5,5,2,5), "mm"),
top_annotation=colAnn,
column_split = 14,
clustering_method_columns = "ward.D2",
heatmap_legend_param = list(
title = "Z-Scores",
title_gp = gpar(fontsize = 14*96/72,
fontface = "bold"),
labels_gp = gpar(fontsize = 12*96/72),
legend_height = unit(2, "cm")
),
width = unit(15*96/72, "cm"),
height = unit(5*96/72, "cm"),
use_raster = TRUE, # for raster quality, needs to be tested
raster_quality = 5)
draw(hmap, heatmap_legend_side="right", annotation_legend_side="right")
# ED_Fig5F ----------------------------------------------------------------
## load scRNAseq data from public dataset GSE162950 ## load scRNAseq data from public dataset GSE162950
load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata") load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata")
EHT_scorecard <- c("CXCR4","SOX17","GJA5","MECOM","HOXA9","SPN","CD44","ITGA2B","HLF","GFI1","MLLT3","KCNK17","MYB","STAT5A","SMIM24","RAB27B","SPINK2") EHT_scorecard <- c("CXCR4","SOX17","GJA5","MECOM","HOXA9","SPN","CD44","ITGA2B","HLF","GFI1","MLLT3","KCNK17","MYB","STAT5A","SMIM24","RAB27B","SPINK2")
...@@ -32,110 +341,10 @@ DotPlot(sample_sub, ...@@ -32,110 +341,10 @@ DotPlot(sample_sub,
dot.scale = 10) + dot.scale = 10) +
coord_flip() coord_flip()
# ED_Fig5C ------------------------------------------------------------------
## load Seurat data
sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
sample@meta.data[["Cell.Annotations"]] <- recode_factor(sample@meta.data[["RNA_snn_res.0.6"]],
"0" = "HECs",
"1" = "HECs",
"2" = "HECs",
"3" = "Venous cells",
"4" = "Venous cells",
"5" = "Arterial ECs",
"6" = "Arterial ECs",
"7" = "Venous cells",
"8" = "ECs (M phase)",
"9" = "Venous cells",
"10" = "EndoMT cells",
"11" = "HECs",
"12" = "Arterial ECs",
"13" = "Lymphatic ECs",
"14" = "Arterial ECs",
"15" = "Allantois/Placenta",
"16" = "HECs",
"17" = "HECs",
"18" = "EndoMT cells",
"19" = "SM cells",
"20" = "EndoMT cells",
"21" = "EndoMT cells")
DimPlot(object = sample,
reduction = "umap",
group.by = "Cell.Annotations",
label = F,
repel = T,
pt.size = 0.8,
label.size = 6,
cols = c('HECs' = "#377EB8",
'Lymphatic ECs' = "#4DAF4A",
'Arterial ECs' = "#E41A1C",
'Venous cells' = "#FF7F00",
'EndoMT cells' = "#E6AB02",
'Allantois/Placenta' = "#984EA3",
'SM cells' = "#A6761D",
"ECs (M phase)" = "#A50026"))
# ED_Fig5D - ED_Fig5E ------------------------------------------------------------------
FeaturePlot(object = sample,
features = c("RUNX1", "FCGR2B"),
cols = c("grey90","blue"),
order = T,
pt.size = 0.1)
# ED_Fig5F ------------------------------------------------------------------
runx1 <- readRDS("Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds")
## Monocle3
Idents(runx1) <- "RNA_snn_res.0.6"
start = WhichCells(runx1,idents = "0")
expression_matrix <- runx1@assays[["RNA"]]@counts
cell_metadata <- runx1@meta.data
cell_metadata$orig.ident <- factor(cell_metadata$orig.ident, levels = unique(cell_metadata$orig.ident))
gene_annotation <- data.frame("gene_short_name" = rownames(runx1))
rownames(gene_annotation) <- gene_annotation$gene_short_name
cds <- new_cell_data_set(expression_data = expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
cds <- preprocess_cds(cds, num_dim = 50)
cds <- reduce_dimension(cds)
## Construct and assign the made up partition
recreate.partition <- c(rep(1, length(cds@colData@rownames)))
names(recreate.partition) <- cds@colData@rownames
recreate.partition <- as.factor(recreate.partition)
cds@clusters@listData[["UMAP"]][["partitions"]] <- recreate.partition
## Assign the cluster info
list_cluster <- runx1@meta.data[["RNA_snn_res.0.6"]]
names(list_cluster) <- runx1@assays[["RNA"]]@data@Dimnames[[2]]
cds@clusters@listData[["UMAP"]][["clusters"]] <- list_cluster
cds@clusters@listData[["UMAP"]][["louvain_res"]] <- "NA"
## Assign UMAP coordinate
reducedDims(cds)[["UMAP"]] <- runx1@reductions[["umap"]]@cell.embeddings
### Reset colnames and rownames (consequence of UMAP replacement)
rownames(cds@principal_graph_aux[['UMAP']]$dp_mst) <- NULL
## Learn Graph
cds <- learn_graph(cds = cds,use_partition = T,learn_graph_control=list(ncenter=220,minimal_branch_len=15),verbose = T)
cds <- order_cells(cds, root_cells = start)
plot_cells(cds,
color_cells_by = "pseudotime",
label_groups_by_cluster=FALSE,
label_leaves=T,
label_branch_points=T,
group_label_size = 8,
graph_label_size = 3,
cell_size = 1,
trajectory_graph_segment_size = 1)
# ED_Fig5G ------------------------------------------------------------------
cds <- estimate_size_factors(cds)
gi <- c("H19","KCNK17","RUNX1", "MYB", "SPN")
cds_gi <- cds[rowData(cds)$gene_short_name %in% gi,]
plot_genes_in_pseudotime(cds_subset = cds_gi,
ncol = 3,
color_cells_by = "RNA_snn_res.0.6",
min_expr = NULL,
cell_size = 1)
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import celloracle as co\n",
"from celloracle.applications import Pseudotime_calculator\n",
"from celloracle.applications import Gradient_calculator\n",
"from celloracle.applications import Oracle_development_module\n",
"import anndata2ri\n",
"import os\n",
"import sys\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"import seaborn as sns\n",
"import copy\n",
"import glob\n",
"import time\n",
"import shutil\n",
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting parameters settings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# visualization settings\n",
"%config InlineBackend.figure_format = 'retina'\n",
"%matplotlib inline\n",
"plt.rcParams['figure.figsize'] = [6, 4.5]\n",
"plt.rcParams[\"savefig.dpi\"] = 300"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"save_folder = \"Scarfo_HEC2023/scRNAseq\"\n",
"os.makedirs(save_folder, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Seurat object in Python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Activate the anndata2ri conversion between SingleCellExperiment and AnnData\n",
"anndata2ri.activate()\n",
"#Loading the rpy2 extension\n",
"%load_ext rpy2.ipython\n",
"sc.settings.verbosity = 3\n",
"sc.logging.print_versions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"suppressPackageStartupMessages(library(Seurat))\n",
"runx1_obj <- readRDS(\"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R -o runx1_obj_sce\n",
"#convert the Seurat object to a SingleCellExperiment object\n",
"runx1_obj_sce <- as.SingleCellExperiment(runx1_obj)\n",
"runx1_obj_sce"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"adata = runx1_obj_sce\n",
"adata"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sc.pl.umap(adata, color='RNA_snn_new_res.0.6')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Constructing GRN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select top 3000 highly-variable genes\n",
"filter_result = sc.pp.filter_genes_dispersion(adata.X,flavor='cell_ranger',n_top_genes=3000,log=False)\n",
"# Subset the genes\n",
"adata = adata[:, filter_result.gene_subset]\n",
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load data from CellTalkDB\n",
"ligand_receptor = pd.read_csv(\"Scarfo_HEC2023/scRNAseq/CellTalkDB_modified.txt\", sep='\\t')\n",
"ligand_receptor.columns = ['Ligand','Receptor']\n",
"ligand_receptor\n",
"print('FCGR2B' in ligand_receptor['Ligand'].unique())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Make dictionary: dictionary key is Ligand and dictionary value is list of target genes.\n",
"LR_to_TG_dictionary = {}\n",
"for LR, TGs in zip(ligand_receptor.Ligand, ligand_receptor.Receptor):\n",
" TG_list = TGs.replace(\" \", \"\").split(\",\")\n",
" LR_to_TG_dictionary[LR] = TG_list\n",
"TG_to_LR_dictionary = co.utility.inverse_dictionary(LR_to_TG_dictionary)\n",
"TG_to_LR_dictionary"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Oracle object\n",
"oracle = co.Oracle()\n",
"oracle.import_anndata_as_raw_count(adata=adata,cluster_column_name=\"RNA_snn_new_res.0.6\",embedding_name=\"X_umap\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add LR information \n",
"oracle.addTFinfo_dictionary(TG_to_LR_dictionary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Perform PCA\n",
"oracle.perform_PCA()\n",
"# Select important PCs\n",
"plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])\n",
"n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]\n",
"plt.axvline(n_comps, c=\"k\")\n",
"plt.show()\n",
"print(n_comps)\n",
"n_comps = min(n_comps, 50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimate the optimal number of nearest neighbors for KNN imputation\n",
"n_cell = oracle.adata.shape[0]\n",
"print(f\"cell number is :{n_cell}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"k = int(0.025*n_cell)\n",
"print(f\"Auto-selected k is :{k}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# perform KNN imputation\n",
"oracle.knn_imputation(n_pca_dims=n_comps,k=k, balanced=True,b_sight=k*8,b_maxl=k*4,n_jobs=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# GRN calculation\n",
"%time\n",
"links = oracle.get_links(cluster_name_for_GRN_unit=\"RNA_snn_new_res.0.6\", alpha=10,verbose_level=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"links.links_dict.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# filter weak edges\n",
"links.filter_links(p=0.001, weight=\"coef_abs\", threshold_number=2000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# network degree distribution\n",
"links.plot_degree_distributions(plot_model=True,\n",
" save=f\"{save_folder}/degree_distribution/\",)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate network scores.\n",
"links.get_network_score()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding pseudotime to oracle object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate pseudotime object using oracle object.\n",
"pt = Pseudotime_calculator(oracle_object=oracle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clusters_in_RUNX1_lineage = ['0','1','2','11','16','17']\n",
"# Make a dictionary\n",
"lineage_dictionary = {\"Lineage_RUNX1\": clusters_in_RUNX1_lineage}\n",
"# Input lineage information into pseudotime object\n",
"pt.set_lineage(lineage_dictionary=lineage_dictionary)\n",
"# Visualize lineage information\n",
"pt.plot_lineages()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add root cell information\n",
"# plotly is required\n",
"try:\n",
" import plotly.express as px\n",
" def plot(adata, embedding_key, cluster_column_name):\n",
" embedding = adata.obsm[embedding_key]\n",
" df = pd.DataFrame(embedding, columns=[\"x\", \"y\"])\n",
" df[\"cluster\"] = adata.obs[cluster_column_name].values\n",
" df[\"label\"] = adata.obs.index.values\n",
" fig = px.scatter(df, x=\"x\", y=\"y\", hover_name=df[\"label\"], color=\"cluster\")\n",
" fig.show()\n",
" plot(adata=pt.adata,\n",
" embedding_key=pt.obsm_key,\n",
" cluster_column_name=pt.cluster_column_name)\n",
"except:\n",
" print(\"Plotly not found in your environment. Did you install plotly?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimated root cell name for each lineage\n",
"root_cells = {\"Lineage_RUNX1\": \"Sample1_GGAGAACTCGCGGTAC-1\"}\n",
"pt.set_root_cells(root_cells=root_cells)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check diffusion map data.\n",
"\"X_diffmap\" in pt.adata.obsm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# calculate diffusion map\n",
"sc.pp.neighbors(pt.adata, n_neighbors=30)\n",
"sc.tl.diffmap(pt.adata)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate pseudotime\n",
"pt.get_pseudotime_per_each_lineage()\n",
"# Check results\n",
"pt.plot_pseudotime(cmap=\"rainbow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add calculated pseudotime data to the oracle object\n",
"oracle.adata.obs = pt.adata.obs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## In silico perturbation analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# make pridictive models for simulation and fit the ridge regression models again\n",
"links.filter_links()\n",
"oracle.get_cluster_specific_TFdict_from_Links(links_object=links)\n",
"oracle.fit_GRN_for_simulation(alpha=10, use_cluster_specific_TFdict=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"goi = \"FCGR2B\"\n",
"# Enter perturbation conditions to simulate signal propagation after the perturbation.\n",
"oracle.simulate_shift(perturb_condition={goi: 0.0}, n_propagation=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get transition probability\n",
"oracle.estimate_transition_prob(n_neighbors=200,knn_random=True,sampled_fraction=1)\n",
"# Calculate embedding\n",
"oracle.calculate_embedding_shift(sigma_corr=0.05)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quiver plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 6])\n",
"scale = 25\n",
"# Show quiver plot\n",
"oracle.plot_quiver(scale=scale, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_quiver_random(scale=scale, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# establishing digitalized grids\n",
"n_grid = 40\n",
"oracle.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Search for best min_mass.\n",
"oracle.suggest_mass_thresholds(n_suggestion=12)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min_mass = 19\n",
"oracle.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# shift vector plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 5])\n",
"scale_simulation = 2\n",
"# Show quiver plot\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_simulation_flow_random_on_grid(scale=scale_simulation, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot vector field with cell cluster\n",
"fig, ax = plt.subplots(figsize=[8, 8])\n",
"oracle.plot_cluster_whole(ax=ax, s=10)\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax, show_background=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize pseudotime\n",
"fig, ax = plt.subplots(figsize=[6,6])\n",
"sc.pl.embedding(adata=oracle.adata, basis=oracle.embedding_name, ax=ax, cmap=\"viridis\",\n",
" color=[\"Pseudotime\"], save=\"pseudotime.pdf\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Gradient calculator object\n",
"gradient = Gradient_calculator(oracle_object=oracle, pseudotime_key=\"Pseudotime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gradient.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)\n",
"gradient.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# convert pseudotime intop grid points\n",
"gradient.transfer_data_into_grid(args={\"method\": \"polynomial\", \"n_poly\":5}, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate graddient\n",
"gradient.calculate_gradient()\n",
"# Show results\n",
"scale_dev = 20\n",
"gradient.visualize_results(scale=scale_dev, s=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize results\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"gradient.plot_dev_flow_on_grid(scale=scale_dev, ax=ax)\n",
"ax.set_title(f\"Normal Developmental flow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quantitatively compare the directionality and size of vectors between perturbation simulation and natural differentiation using inner product\n",
"# Make Oracle_development_module to compare two vector field\n",
"dev = Oracle_development_module()\n",
"# Load development flow\n",
"dev.load_differentiation_reference_data(gradient_object=gradient)\n",
"# Load simulation result\n",
"dev.load_perturb_simulation_data(oracle_object=oracle)\n",
"# Calculate inner produc scores\n",
"dev.calculate_inner_product()\n",
"dev.calculate_digitized_ip(n_bins=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores\n",
"vm = 0.2\n",
"fig, ax = plt.subplots(1, 2, figsize=[12, 5])\n",
"dev.plot_inner_product_on_grid(vm=0.02, s=50, ax=ax[0])\n",
"ax[0].set_title(f\"PS calculated with FCGR2B KO\")\n",
"dev.plot_inner_product_random_on_grid(vm=vm, s=50, ax=ax[1])\n",
"ax[1].set_title(f\"PS calculated with Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores with perturbation simulation vector field\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"dev.plot_inner_product_on_grid(vm=0.05, s=50, ax=ax)\n",
"dev.plot_simulation_flow_on_grid(scale=scale_simulation, show_background=False, ax=ax)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# summary plots\n",
"# visualize the results\n",
"dev.visualize_development_module_layout_0(s=5, scale_for_simulation=scale_simulation, s_grid=50, scale_for_pseudotime=scale_dev, vm=0.05)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:postsc]",
"language": "python",
"name": "conda-env-postsc-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(monocle3))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(dyno))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(magrittr))
suppressPackageStartupMessages(library(patchwork))
# ED_Fig6A ------------------------------------------------------------------
## load Seurat data
sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
sample@meta.data[["Cell.Annotations"]] <- recode_factor(sample@meta.data[["RNA_snn_res.0.6"]],
"0" = "HECs",
"1" = "HECs",
"2" = "HECs",
"3" = "Venous cells",
"4" = "Venous cells",
"5" = "Arterial ECs",
"6" = "Arterial ECs",
"7" = "Venous cells",
"8" = "ECs (M phase)",
"9" = "Venous cells",
"10" = "EndoMT cells",
"11" = "HECs",
"12" = "Arterial ECs",
"13" = "Lymphatic ECs",
"14" = "Arterial ECs",
"15" = "Allantois/Placenta",
"16" = "HECs",
"17" = "HECs",
"18" = "EndoMT cells",
"19" = "SM cells",
"20" = "EndoMT cells",
"21" = "EndoMT cells")
DimPlot(object = sample,
reduction = "umap",
group.by = "Cell.Annotations",
label = F,
repel = T,
pt.size = 0.8,
label.size = 6,
cols = c('HECs' = "#377EB8",
'Lymphatic ECs' = "#4DAF4A",
'Arterial ECs' = "#E41A1C",
'Venous cells' = "#FF7F00",
'EndoMT cells' = "#E6AB02",
'Allantois/Placenta' = "#984EA3",
'SM cells' = "#A6761D",
"ECs (M phase)" = "#A50026"))
# ED_Fig6B - ED_Fig6C ------------------------------------------------------------------
FeaturePlot(object = sample,
features = c("RUNX1", "FCGR2B"),
cols = c("grey90","blue"),
order = T,
pt.size = 0.1)
# ED_Fig6D ------------------------------------------------------------------
runx1 <- readRDS("Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds")
## Monocle3
Idents(runx1) <- "RNA_snn_res.0.6"
start = WhichCells(runx1,idents = "0")
expression_matrix <- runx1@assays[["RNA"]]@counts
cell_metadata <- runx1@meta.data
cell_metadata$orig.ident <- factor(cell_metadata$orig.ident, levels = unique(cell_metadata$orig.ident))
gene_annotation <- data.frame("gene_short_name" = rownames(runx1))
rownames(gene_annotation) <- gene_annotation$gene_short_name
cds <- new_cell_data_set(expression_data = expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
cds <- preprocess_cds(cds, num_dim = 50)
cds <- reduce_dimension(cds)
## Construct and assign the made up partition
recreate.partition <- c(rep(1, length(cds@colData@rownames)))
names(recreate.partition) <- cds@colData@rownames
recreate.partition <- as.factor(recreate.partition)
cds@clusters@listData[["UMAP"]][["partitions"]] <- recreate.partition
## Assign the cluster info
list_cluster <- runx1@meta.data[["RNA_snn_res.0.6"]]
names(list_cluster) <- runx1@assays[["RNA"]]@data@Dimnames[[2]]
cds@clusters@listData[["UMAP"]][["clusters"]] <- list_cluster
cds@clusters@listData[["UMAP"]][["louvain_res"]] <- "NA"
## Assign UMAP coordinate
reducedDims(cds)[["UMAP"]] <- runx1@reductions[["umap"]]@cell.embeddings
### Reset colnames and rownames (consequence of UMAP replacement)
rownames(cds@principal_graph_aux[['UMAP']]$dp_mst) <- NULL
## Learn Graph
cds <- learn_graph(cds = cds,use_partition = T,learn_graph_control=list(ncenter=220,minimal_branch_len=15),verbose = T)
cds <- order_cells(cds, root_cells = start)
plot_cells(cds,
color_cells_by = "pseudotime",
label_groups_by_cluster=FALSE,
label_leaves=T,
label_branch_points=T,
group_label_size = 8,
graph_label_size = 3,
cell_size = 1,
trajectory_graph_segment_size = 1)
# ED_Fig6E ------------------------------------------------------------------
cds <- estimate_size_factors(cds)
gi <- c("H19","KCNK17","RUNX1", "MYB", "SPN")
cds_gi <- cds[rowData(cds)$gene_short_name %in% gi,]
plot_genes_in_pseudotime(cds_subset = cds_gi,
ncol = 3,
color_cells_by = "RNA_snn_res.0.6",
min_expr = NULL,
cell_size = 1)
# ED_Fig6F - ED_Fig6G ------------------------------------------------------------------
## load Seurat data
sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
runx1 <- subset(sample, idents = c("0","1","2","11","16","17"))
Idents(runx1) <- "RNA_snn_new_res.0.6"
# preparing data for dyno
df <- as.matrix(runx1[["RNA"]]@data)
counts <- Matrix::t(as(as.matrix(runx1@assays$RNA@counts), 'sparseMatrix'))
expression <- Matrix::t(as(as.matrix(runx1@assays$RNA@data), 'sparseMatrix'))
dataset_runx1 <- wrap_expression(expression = expression,
counts = counts)
dataset_runx1 <- dataset_runx1 %>%
add_prior_information(start_id = "Sample1_ATCTCTATCCAATCCC-1", # from cluster 0
start_n = 0) %>%
add_grouping(grouping = runx1$RNA_snn_new_res.0.6) %>%
add_dimred(Embeddings(runx1, "umap"))
# selecting method
guidelines <- guidelines_shiny(dataset_runx1)
methods_selected <- guidelines$methods_selected
methods_selected
# running top model
model_paga_t <- infer_trajectory(dataset_runx1,
method = ti_paga_tree(),
verbose = T,
give_priors = c("start_id","start_n"))
# plotting trajectory and pseudotime scores
patchwork::wrap_plots(
plot_dimred(model_paga_t, size_cells = 0.5, grouping = dataset_runx1$grouping) +
ggtitle(" RUNX1 Clusters") +
scale_color_manual(values= c("#F8766D","#B79F00","#00BFC4","#619CFF",
"#F564E3","#00BA38")) +
labs(color = "Clusters") +
guides(color = guide_legend(override.aes = list(size = 2))),
plot_dimred(model_paga_t, size_cells = 0.5, "pseudotime", pseudotime = calculate_pseudotime(model_paga_t)) +
ggtitle("Pseudotime")
)
...@@ -9,7 +9,7 @@ suppressPackageStartupMessages(library(clusterProfiler)) ...@@ -9,7 +9,7 @@ suppressPackageStartupMessages(library(clusterProfiler))
## load Seurat data ## load Seurat data
sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds") sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
# Fig5A ------------------------------------------------------------------ # Fig4A ------------------------------------------------------------------
DimPlot(object = sample, DimPlot(object = sample,
reduction = "umap", reduction = "umap",
...@@ -72,7 +72,7 @@ DimPlot(object = runx1, ...@@ -72,7 +72,7 @@ DimPlot(object = runx1,
label.size = 8) label.size = 8)
# Fig5B ---------------------------------------------------------------- # Fig4B ----------------------------------------------------------------
## Monocle3 ## Monocle3
Idents(runx1) <- "RNA_snn_res.0.6" Idents(runx1) <- "RNA_snn_res.0.6"
...@@ -114,7 +114,7 @@ plot_cells(cds, ...@@ -114,7 +114,7 @@ plot_cells(cds,
cell_size = 1) cell_size = 1)
# Fig5C ------------------------------------------------------------------- # Fig4C -------------------------------------------------------------------
## differential markers ## differential markers
degs_clu11_vs_clu0_1_2 <- FindMarkers(object = runx1, ident.1 = 11, ident.2 = c(0,1,2), only.pos = FALSE, min.pct = 0, test.use = "wilcox", logfc.threshold = 0, min.cells.group = 0) degs_clu11_vs_clu0_1_2 <- FindMarkers(object = runx1, ident.1 = 11, ident.2 = c(0,1,2), only.pos = FALSE, min.pct = 0, test.use = "wilcox", logfc.threshold = 0, min.cells.group = 0)
...@@ -171,7 +171,7 @@ for (i in comp) { ...@@ -171,7 +171,7 @@ for (i in comp) {
assign(paste0("p_",i), p) assign(paste0("p_",i), p)
} }
# Fig5D ------------------------------------------------------------------- # Fig4D -------------------------------------------------------------------
runx1@meta.data[["Phase"]] <- recode_factor(runx1@meta.data[["Phase"]], runx1@meta.data[["Phase"]] <- recode_factor(runx1@meta.data[["Phase"]],
"G1" = "G1", "G1" = "G1",
...@@ -243,7 +243,7 @@ for (i in cc.clusters) { ...@@ -243,7 +243,7 @@ for (i in cc.clusters) {
} }
p_final <- cc_umap / (`p_clu_0,1,2` | p_clu_11 | `p_clu_16,17`) p_final <- cc_umap / (`p_clu_0,1,2` | p_clu_11 | `p_clu_16,17`)
# Fig5E ------------------------------------------------------------------- # Fig4E -------------------------------------------------------------------
cds <- order_cells(cds, root_cells = start) cds <- order_cells(cds, root_cells = start)
cds <- estimate_size_factors(cds) cds <- estimate_size_factors(cds)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment