Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
custom
Scarfo HEC2023
Commits
7dcd8057
Commit
7dcd8057
authored
Dec 12, 2023
by
Monah Abou Alezz
Browse files
revisions_update_v1
parent
839b34cc
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
BulkRNAseq_2/ED_Figure5_plots.R
→
BulkRNAseq_2/ED_Figure5
A
_plots.R
View file @
7dcd8057
...
@@ -73,6 +73,7 @@ DGE.results.sig <- subset(DGE.results.sorted,
...
@@ -73,6 +73,7 @@ DGE.results.sig <- subset(DGE.results.sorted,
FDR
<
adj.pval.thr
&
abs
(
logFC
)
>
logfc.thr
)
FDR
<
adj.pval.thr
&
abs
(
logFC
)
>
logfc.thr
)
DGEgenes
<-
rownames
(
DGE.results.sig
)
DGEgenes
<-
rownames
(
DGE.results.sig
)
# ED_Fig5A -----------------------------------------------------------------
# ED_Fig5A -----------------------------------------------------------------
## scorecard
## scorecard
...
...
BulkRNAseq_2/featureCounts_results_tpm.txt
0 → 100644
View file @
7dcd8057
This diff is collapsed.
Click to expand it.
scRNAseq/.ipynb_checkpoints/CellOracle-checkpoint.ipynb
0 → 100644
View file @
7dcd8057
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import celloracle as co\n",
"from celloracle.applications import Pseudotime_calculator\n",
"from celloracle.applications import Gradient_calculator\n",
"from celloracle.applications import Oracle_development_module\n",
"import anndata2ri\n",
"import os\n",
"import sys\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"import seaborn as sns\n",
"import copy\n",
"import glob\n",
"import time\n",
"import shutil\n",
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting parameters settings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# visualization settings\n",
"%config InlineBackend.figure_format = 'retina'\n",
"%matplotlib inline\n",
"plt.rcParams['figure.figsize'] = [6, 4.5]\n",
"plt.rcParams[\"savefig.dpi\"] = 300"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"save_folder = \"Scarfo_HEC2023/scRNAseq\"\n",
"os.makedirs(save_folder, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Seurat object in Python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Activate the anndata2ri conversion between SingleCellExperiment and AnnData\n",
"anndata2ri.activate()\n",
"#Loading the rpy2 extension\n",
"%load_ext rpy2.ipython\n",
"sc.settings.verbosity = 3\n",
"sc.logging.print_versions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"suppressPackageStartupMessages(library(Seurat))\n",
"runx1_obj <- readRDS(\"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R -o runx1_obj_sce\n",
"#convert the Seurat object to a SingleCellExperiment object\n",
"runx1_obj_sce <- as.SingleCellExperiment(runx1_obj)\n",
"runx1_obj_sce"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"adata = runx1_obj_sce\n",
"adata"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sc.pl.umap(adata, color='RNA_snn_new_res.0.6')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Constructing GRN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select top 3000 highly-variable genes\n",
"filter_result = sc.pp.filter_genes_dispersion(adata.X,flavor='cell_ranger',n_top_genes=3000,log=False)\n",
"# Subset the genes\n",
"adata = adata[:, filter_result.gene_subset]\n",
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load data from CellTalkDB\n",
"ligand_receptor = pd.read_csv(\"Scarfo_HEC2023/scRNAseq/CellTalkDB_modified.txt\", sep='\\t')\n",
"ligand_receptor.columns = ['Ligand','Receptor']\n",
"ligand_receptor\n",
"print('FCGR2B' in ligand_receptor['Ligand'].unique())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Make dictionary: dictionary key is Ligand and dictionary value is list of target genes.\n",
"LR_to_TG_dictionary = {}\n",
"for LR, TGs in zip(ligand_receptor.Ligand, ligand_receptor.Receptor):\n",
" TG_list = TGs.replace(\" \", \"\").split(\",\")\n",
" LR_to_TG_dictionary[LR] = TG_list\n",
"TG_to_LR_dictionary = co.utility.inverse_dictionary(LR_to_TG_dictionary)\n",
"TG_to_LR_dictionary"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Oracle object\n",
"oracle = co.Oracle()\n",
"oracle.import_anndata_as_raw_count(adata=adata,cluster_column_name=\"RNA_snn_new_res.0.6\",embedding_name=\"X_umap\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add LR information \n",
"oracle.addTFinfo_dictionary(TG_to_LR_dictionary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Perform PCA\n",
"oracle.perform_PCA()\n",
"# Select important PCs\n",
"plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])\n",
"n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]\n",
"plt.axvline(n_comps, c=\"k\")\n",
"plt.show()\n",
"print(n_comps)\n",
"n_comps = min(n_comps, 50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimate the optimal number of nearest neighbors for KNN imputation\n",
"n_cell = oracle.adata.shape[0]\n",
"print(f\"cell number is :{n_cell}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"k = int(0.025*n_cell)\n",
"print(f\"Auto-selected k is :{k}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# perform KNN imputation\n",
"oracle.knn_imputation(n_pca_dims=n_comps,k=k, balanced=True,b_sight=k*8,b_maxl=k*4,n_jobs=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# GRN calculation\n",
"%time\n",
"links = oracle.get_links(cluster_name_for_GRN_unit=\"RNA_snn_new_res.0.6\", alpha=10,verbose_level=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"links.links_dict.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# filter weak edges\n",
"links.filter_links(p=0.001, weight=\"coef_abs\", threshold_number=2000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# network degree distribution\n",
"links.plot_degree_distributions(plot_model=True,\n",
" save=f\"{save_folder}/degree_distribution/\",)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate network scores.\n",
"links.get_network_score()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding pseudotime to oracle object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate pseudotime object using oracle object.\n",
"pt = Pseudotime_calculator(oracle_object=oracle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clusters_in_RUNX1_lineage = ['0','1','2','11','16','17']\n",
"# Make a dictionary\n",
"lineage_dictionary = {\"Lineage_RUNX1\": clusters_in_RUNX1_lineage}\n",
"# Input lineage information into pseudotime object\n",
"pt.set_lineage(lineage_dictionary=lineage_dictionary)\n",
"# Visualize lineage information\n",
"pt.plot_lineages()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add root cell information\n",
"# plotly is required\n",
"try:\n",
" import plotly.express as px\n",
" def plot(adata, embedding_key, cluster_column_name):\n",
" embedding = adata.obsm[embedding_key]\n",
" df = pd.DataFrame(embedding, columns=[\"x\", \"y\"])\n",
" df[\"cluster\"] = adata.obs[cluster_column_name].values\n",
" df[\"label\"] = adata.obs.index.values\n",
" fig = px.scatter(df, x=\"x\", y=\"y\", hover_name=df[\"label\"], color=\"cluster\")\n",
" fig.show()\n",
" plot(adata=pt.adata,\n",
" embedding_key=pt.obsm_key,\n",
" cluster_column_name=pt.cluster_column_name)\n",
"except:\n",
" print(\"Plotly not found in your environment. Did you install plotly?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimated root cell name for each lineage\n",
"root_cells = {\"Lineage_RUNX1\": \"Sample1_GGAGAACTCGCGGTAC-1\"}\n",
"pt.set_root_cells(root_cells=root_cells)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check diffusion map data.\n",
"\"X_diffmap\" in pt.adata.obsm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# calculate diffusion map\n",
"sc.pp.neighbors(pt.adata, n_neighbors=30)\n",
"sc.tl.diffmap(pt.adata)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate pseudotime\n",
"pt.get_pseudotime_per_each_lineage()\n",
"# Check results\n",
"pt.plot_pseudotime(cmap=\"rainbow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add calculated pseudotime data to the oracle object\n",
"oracle.adata.obs = pt.adata.obs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## In silico perturbation analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# make pridictive models for simulation and fit the ridge regression models again\n",
"links.filter_links()\n",
"oracle.get_cluster_specific_TFdict_from_Links(links_object=links)\n",
"oracle.fit_GRN_for_simulation(alpha=10, use_cluster_specific_TFdict=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"goi = \"FCGR2B\"\n",
"# Enter perturbation conditions to simulate signal propagation after the perturbation.\n",
"oracle.simulate_shift(perturb_condition={goi: 0.0}, n_propagation=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get transition probability\n",
"oracle.estimate_transition_prob(n_neighbors=200,knn_random=True,sampled_fraction=1)\n",
"# Calculate embedding\n",
"oracle.calculate_embedding_shift(sigma_corr=0.05)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quiver plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 6])\n",
"scale = 25\n",
"# Show quiver plot\n",
"oracle.plot_quiver(scale=scale, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_quiver_random(scale=scale, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# establishing digitalized grids\n",
"n_grid = 40\n",
"oracle.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Search for best min_mass.\n",
"oracle.suggest_mass_thresholds(n_suggestion=12)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min_mass = 19\n",
"oracle.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# shift vector plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 5])\n",
"scale_simulation = 2\n",
"# Show quiver plot\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_simulation_flow_random_on_grid(scale=scale_simulation, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot vector field with cell cluster\n",
"fig, ax = plt.subplots(figsize=[8, 8])\n",
"oracle.plot_cluster_whole(ax=ax, s=10)\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax, show_background=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize pseudotime\n",
"fig, ax = plt.subplots(figsize=[6,6])\n",
"sc.pl.embedding(adata=oracle.adata, basis=oracle.embedding_name, ax=ax, cmap=\"viridis\",\n",
" color=[\"Pseudotime\"], save=\"pseudotime.pdf\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Gradient calculator object\n",
"gradient = Gradient_calculator(oracle_object=oracle, pseudotime_key=\"Pseudotime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gradient.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)\n",
"gradient.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# convert pseudotime intop grid points\n",
"gradient.transfer_data_into_grid(args={\"method\": \"polynomial\", \"n_poly\":5}, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate graddient\n",
"gradient.calculate_gradient()\n",
"# Show results\n",
"scale_dev = 20\n",
"gradient.visualize_results(scale=scale_dev, s=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize results\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"gradient.plot_dev_flow_on_grid(scale=scale_dev, ax=ax)\n",
"ax.set_title(f\"Normal Developmental flow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quantitatively compare the directionality and size of vectors between perturbation simulation and natural differentiation using inner product\n",
"# Make Oracle_development_module to compare two vector field\n",
"dev = Oracle_development_module()\n",
"# Load development flow\n",
"dev.load_differentiation_reference_data(gradient_object=gradient)\n",
"# Load simulation result\n",
"dev.load_perturb_simulation_data(oracle_object=oracle)\n",
"# Calculate inner produc scores\n",
"dev.calculate_inner_product()\n",
"dev.calculate_digitized_ip(n_bins=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores\n",
"vm = 0.2\n",
"fig, ax = plt.subplots(1, 2, figsize=[12, 5])\n",
"dev.plot_inner_product_on_grid(vm=0.02, s=50, ax=ax[0])\n",
"ax[0].set_title(f\"PS calculated with FCGR2B KO\")\n",
"dev.plot_inner_product_random_on_grid(vm=vm, s=50, ax=ax[1])\n",
"ax[1].set_title(f\"PS calculated with Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores with perturbation simulation vector field\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"dev.plot_inner_product_on_grid(vm=0.05, s=50, ax=ax)\n",
"dev.plot_simulation_flow_on_grid(scale=scale_simulation, show_background=False, ax=ax)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# summary plots\n",
"# visualize the results\n",
"dev.visualize_development_module_layout_0(s=5, scale_for_simulation=scale_simulation, s_grid=50, scale_for_pseudotime=scale_dev, vm=0.05)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:postsc]",
"language": "python",
"name": "conda-env-postsc-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
scRNAseq/CellTalkDB_modified.txt
0 → 100644
View file @
7dcd8057
This diff is collapsed.
Click to expand it.
scRNAseq/ED_Figure5_plots.R
View file @
7dcd8057
...
@@ -2,9 +2,318 @@ suppressPackageStartupMessages(library(Seurat))
...
@@ -2,9 +2,318 @@ suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages
(
library
(
monocle3
))
suppressPackageStartupMessages
(
library
(
monocle3
))
suppressPackageStartupMessages
(
library
(
ggplot2
))
suppressPackageStartupMessages
(
library
(
ggplot2
))
suppressPackageStartupMessages
(
library
(
dplyr
))
suppressPackageStartupMessages
(
library
(
dplyr
))
suppressPackageStartupMessages
(
library
(
ComplexHeatmap
))
# ED_Fig5B ----------------------------------------------------------------
# ED_Fig5B ----------------------------------------------------------------
## load scRNAseq data from public dataset GSE162950
load
(
"Scarfo_HEC2023/scRNAseq/seurat_object.Rdata"
)
sample
<-
SetIdent
(
sample
,
value
=
sample
@
meta.data
$
seurat_clusters
)
DimPlot
(
sample
,
label
=
TRUE
,
group.by
=
"seurat_clusters"
)
sample
@
meta.data
[[
"orig.ident"
]]
<-
factor
(
sample
@
meta.data
[[
"orig.ident"
]],
levels
=
c
(
"AGM_CS10_Liu"
,
"AGM_CS11_Liu"
,
"AGM_CS13_Liu"
,
"AGM_4wk_658"
,
"AGM_5wk_555"
,
"AGM_5wk_575"
,
"AGM_6wk_563"
,
"YolkSac_CS11_Liu"
))
DimPlot
(
sample
,
label
=
F
,
repel
=
T
,
reduction
=
"umap"
,
group.by
=
"orig.ident"
,
pt.size
=
0.1
,
cols
=
c
(
"AGM_CS10_Liu"
=
"#00BFC4"
,
"AGM_CS11_Liu"
=
"#00A9FF"
,
"AGM_CS13_Liu"
=
"#C77CFF"
,
"AGM_4wk_658"
=
"#F8766D"
,
"AGM_5wk_555"
=
"#CD9600"
,
"AGM_5wk_575"
=
"#7CAE00"
,
"AGM_6wk_563"
=
"#00BE67"
,
"YolkSac_CS11_Liu"
=
"#FF61CC"
))
# ED_Fig5C ----------------------------------------------------------------
pal
<-
viridis
(
n
=
10
,
option
=
"D"
)
FeaturePlot
(
sample
,
features
=
c
(
"HOXA9"
,
"HOXA10"
),
ncol
=
2
,
repel
=
T
,
pt.size
=
0.05
,
order
=
T
,
cols
=
pal
)
# ED_Fig5E ----------------------------------------------------------------
install_github
(
"sturgeonlab/Luff-etal-2021/SingleR/versioncontrolscripts"
)
library
(
versioncontrolscripts
)
## load reference dataset
cd32_dll4
<-
read.table
(
"Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_tpm.txt"
,
header
=
T
,
sep
=
"\t"
)
## load Seurat data
load
(
"Scarfo_HEC2023/scRNAseq/seurat_object.Rdata"
)
sample
<-
SetIdent
(
sample
,
value
=
sample
@
meta.data
$
seurat_clusters
)
sample_noYS
<-
subset
(
sample
,
orig.ident
!=
"YolkSac_CS11_Liu"
)
sample_noYS
@
meta.data
[[
"orig.ident"
]]
<-
factor
(
sample_noYS
@
meta.data
[[
"orig.ident"
]],
levels
=
c
(
"AGM_CS10_Liu"
,
"AGM_CS11_Liu"
,
"AGM_CS13_Liu"
,
"AGM_4wk_658"
,
"AGM_5wk_555"
,
"AGM_5wk_575"
,
"AGM_6wk_563"
)
)
## defining HEC and AEC
aec
<-
WhichCells
(
sample_noYS
,
expression
=
CDH5
>
0.2
&
CXCR4
>
0.3
&
GJA5
>
0.2
&
DLL4
>
0.1
&
PTPRC
<
0.1
&
SPN
<
0.05
&
HEY2
>
0.15
)
hec
<-
WhichCells
(
sample_noYS
,
expression
=
RUNX1
>
0.1
&
CDH5
>
0.2
&
HOXA9
>
0.05
&
PTPRC
<
0.1
&
ITGA2B
<
0.1
&
SPN
<
0.05
)
Idents
(
sample_noYS
,
cells
=
aec
)
=
"AEC"
Idents
(
sample_noYS
,
cells
=
hec
)
=
"HEC"
sample_noYS
$
CellType
<-
Idents
(
sample_noYS
)
sample_noYS
<-
subset
(
sample_noYS
,
CellType
==
"AEC"
|
CellType
==
"HEC"
)
Idents
(
sample_noYS
)
<-
"CellType"
## setting up reference dataset
bulk
<-
cd32_dll4
[,
-
c
(
2
:
6
)]
bulk
<-
bulk
[,
c
(
1
,
2
,
3
,
4
,
6
,
7
,
9
)]
colnames
(
bulk
)
<-
c
(
"genes"
,
"RS227_CD184"
,
"RS227_CD32_plus"
,
"RS242_CD184"
,
"RS242_CD32_plus"
,
"RS243_CD184"
,
"RS243_CD32_plus"
)
bulk
$
CD184
<-
rowMeans
(
bulk
[,
c
(
2
,
4
,
6
)])
bulk
$
CD32_plus
<-
rowMeans
(
bulk
[,
c
(
3
,
5
,
7
)])
bulk
<-
bulk
[,
c
(
1
,
9
,
8
)]
types
=
list
(
"CD32_plus"
,
"CD184"
)
main_types
=
list
(
"CD32_plus"
,
"CD184"
)
x
=
bulk
[,
1
]
loss
=
bulk
[,
-1
]
rownames
(
loss
)
=
make.names
(
x
,
unique
=
TRUE
)
bulk
<-
loss
bulk
=
as.matrix
(
bulk
)
types
=
as.character
(
types
)
main_types
=
as.character
(
main_types
)
myref
=
list
(
data
=
bulk
,
main_types
=
main_types
,
types
=
types
)
myref
[[
"de.genes"
]]
=
CreateVariableGeneSet
(
bulk
,
types
,
200
)
myref
[[
"de.genes.main"
]]
=
CreateVariableGeneSet
(
bulk
,
main_types
,
300
)
labels
<-
sample_noYS
@
meta.data
$
orig.ident
stage
<-
sample_noYS
@
meta.data
$
CellType
stage2
<-
data.frame
(
stage
)
stage2
$
stage
<-
as.character
(
stage2
$
stage
)
stage2
<-
stage2
$
stage
# create singlecell dataset for comparison to bulk RNAseq
data
<-
GetAssayData
(
sample_noYS
,
assay
=
"RNA"
,
slot
=
"data"
)
data
<-
as.matrix
(
data
)
# run main SingleR function & add names for cluster
obj
<-
SingleR.CreateObject
(
data
,
myref
,
clusters
=
NULL
,
variable.genes
=
"de"
,
fine.tune
=
F
)
obj
[[
"names"
]]
<-
stage2
obj
[[
"names2"
]]
<-
labels
data
<-
t
(
obj
$
SingleR.single.main
$
r
)
cell_types
=
as.data.frame
(
obj
$
names
)
colnames
(
cell_types
)
=
'CellTypes'
rownames
(
cell_types
)
=
colnames
(
data
)
test
<-
as.data.frame
(
obj
$
names2
)
colnames
(
test
)
<-
"Samples"
rownames
(
test
)
<-
colnames
(
data
)
clusters
<-
cbind
(
cell_types
,
test
)
breaksList
=
seq
(
0
,
1
,
by
=
0.01
)
# export relative correlation scores and names for each cell in cs dataset
scores
=
obj
$
SingleR.single.main
$
r
datascores
<-
as.matrix
(
scores
)
scores
<-
data.frame
(
datascores
)
type
<-
data.frame
(
obj
$
names
)
cells
<-
data.frame
(
obj
$
names2
)
scores
$
type
<-
type
$
obj.names
scores
$
cells
<-
cells
$
obj.names2
#successive sorting orders cells in aesthetically-pleasing manner
for
(
k
in
unique
(
scores
$
cells
))
{
clu
<-
filter
(
scores
,
cells
==
k
)
clu
<-
clu
[
order
(
clu
$
CD184
),
]
clu
<-
clu
[
order
(
clu
$
CD32_plus
),
]
clu_cells
<-
rownames
(
clu
)
assign
(
paste0
(
"clu"
,
k
,
"_cells"
),
clu_cells
)
}
## reorder both clusters and HEC and AEC cells
agm_4
<-
filter
(
scores
,
cells
==
"AGM_4wk_658"
)
agm_4_hec
<-
filter
(
agm_4
,
type
==
"HEC"
)
agm_4_hec
<-
agm_4_hec
[
order
(
agm_4_hec
$
CD184
),]
agm_4_hec
<-
agm_4_hec
[
order
(
agm_4_hec
$
CD32_plus
),]
agm_4_aec
<-
filter
(
agm_4
,
type
==
"AEC"
)
agm_4_aec
<-
agm_4_aec
[
order
(
agm_4_aec
$
CD184
),]
agm_4_aec
<-
agm_4_aec
[
order
(
agm_4_aec
$
CD32_plus
),]
agm_5_1
<-
filter
(
scores
,
cells
==
"AGM_5wk_555"
)
agm_5_1_hec
<-
filter
(
agm_5_1
,
type
==
"HEC"
)
agm_5_1_hec
<-
agm_5_1_hec
[
order
(
agm_5_1_hec
$
CD184
),]
agm_5_1_hec
<-
agm_5_1_hec
[
order
(
agm_5_1_hec
$
CD32_plus
),]
agm_5_1_aec
<-
filter
(
agm_5_1
,
type
==
"AEC"
)
agm_5_1_aec
<-
agm_5_1_aec
[
order
(
agm_5_1_aec
$
CD184
),]
agm_5_1_aec
<-
agm_5_1_aec
[
order
(
agm_5_1_aec
$
CD32_plus
),]
agm_5_2
<-
filter
(
scores
,
cells
==
"AGM_5wk_575"
)
agm_5_2_hec
<-
filter
(
agm_5_2
,
type
==
"HEC"
)
agm_5_2_hec
<-
agm_5_2_hec
[
order
(
agm_5_2_hec
$
CD184
),]
agm_5_2_hec
<-
agm_5_2_hec
[
order
(
agm_5_2_hec
$
CD32_plus
),]
agm_5_2_aec
<-
filter
(
agm_5_2
,
type
==
"AEC"
)
agm_5_2_aec
<-
agm_5_2_aec
[
order
(
agm_5_2_aec
$
CD184
),]
agm_5_2_aec
<-
agm_5_2_aec
[
order
(
agm_5_2_aec
$
CD32_plus
),]
agm_6
<-
filter
(
scores
,
cells
==
"AGM_6wk_563"
)
agm_6_hec
<-
filter
(
agm_6
,
type
==
"HEC"
)
agm_6_hec
<-
agm_6_hec
[
order
(
agm_6_hec
$
CD184
),]
agm_6_hec
<-
agm_6_hec
[
order
(
agm_6_hec
$
CD32_plus
),]
agm_6_aec
<-
filter
(
agm_6
,
type
==
"AEC"
)
agm_6_aec
<-
agm_6_aec
[
order
(
agm_6_aec
$
CD184
),]
agm_6_aec
<-
agm_6_aec
[
order
(
agm_6_aec
$
CD32_plus
),]
agm_cs10
<-
filter
(
scores
,
cells
==
"AGM_CS10_Liu"
)
agm_cs10_hec
<-
filter
(
agm_cs10
,
type
==
"HEC"
)
agm_cs10_hec
<-
agm_cs10_hec
[
order
(
agm_cs10_hec
$
CD184
),]
agm_cs10_hec
<-
agm_cs10_hec
[
order
(
agm_cs10_hec
$
CD32_plus
),]
agm_cs10_aec
<-
filter
(
agm_cs10
,
type
==
"AEC"
)
agm_cs10_aec
<-
agm_cs10_aec
[
order
(
agm_cs10_aec
$
CD184
),]
agm_cs10_aec
<-
agm_cs10_aec
[
order
(
agm_cs10_aec
$
CD32_plus
),]
agm_cs11
<-
filter
(
scores
,
cells
==
"AGM_CS11_Liu"
)
agm_cs11_hec
<-
filter
(
agm_cs11
,
type
==
"HEC"
)
agm_cs11_hec
<-
agm_cs11_hec
[
order
(
agm_cs11_hec
$
CD184
),]
agm_cs11_hec
<-
agm_cs11_hec
[
order
(
agm_cs11_hec
$
CD32_plus
),]
agm_cs11_aec
<-
filter
(
agm_cs11
,
type
==
"AEC"
)
agm_cs11_aec
<-
agm_cs11_aec
[
order
(
agm_cs11_aec
$
CD184
),]
agm_cs11_aec
<-
agm_cs11_aec
[
order
(
agm_cs11_aec
$
CD32_plus
),]
agm_cs13
<-
filter
(
scores
,
cells
==
"AGM_CS13_Liu"
)
agm_cs13_hec
<-
filter
(
agm_cs13
,
type
==
"HEC"
)
agm_cs13_hec
<-
agm_cs13_hec
[
order
(
agm_cs13_hec
$
CD184
),]
agm_cs13_hec
<-
agm_cs13_hec
[
order
(
agm_cs13_hec
$
CD32_plus
),]
agm_cs13_aec
<-
filter
(
agm_cs13
,
type
==
"AEC"
)
agm_cs13_aec
<-
agm_cs13_aec
[
order
(
agm_cs13_aec
$
CD184
),]
agm_cs13_aec
<-
agm_cs13_aec
[
order
(
agm_cs13_aec
$
CD32_plus
),]
agm_4_hec.cells
<-
row.names
(
agm_4_hec
)
agm_4_aec.cells
<-
row.names
(
agm_4_aec
)
agm_5_1_hec.cells
<-
row.names
(
agm_5_1_hec
)
agm_5_1_aec.cells
<-
row.names
(
agm_5_1_aec
)
agm_5_2_hec.cells
<-
row.names
(
agm_5_2_hec
)
agm_5_2_aec.cells
<-
row.names
(
agm_5_2_aec
)
agm_6_hec.cells
<-
row.names
(
agm_6_hec
)
agm_6_aec.cells
<-
row.names
(
agm_6_aec
)
agm_cs10_hec.cells
<-
row.names
(
agm_cs10_hec
)
agm_cs10_aec.cells
<-
row.names
(
agm_cs10_aec
)
agm_cs11_hec.cells
<-
row.names
(
agm_cs11_hec
)
agm_cs11_aec.cells
<-
row.names
(
agm_cs11_aec
)
agm_cs13_hec.cells
<-
row.names
(
agm_cs13_hec
)
agm_cs13_aec.cells
<-
row.names
(
agm_cs13_aec
)
order
<-
c
(
agm_cs10_aec.cells
,
agm_cs11_aec.cells
,
agm_cs13_aec.cells
,
agm_4_aec.cells
,
agm_5_1_aec.cells
,
agm_5_2_aec.cells
,
agm_6_aec.cells
,
agm_cs10_hec.cells
,
agm_cs11_hec.cells
,
agm_cs13_hec.cells
,
agm_4_hec.cells
,
agm_5_1_hec.cells
,
agm_5_2_hec.cells
,
agm_6_hec.cells
)
data_reorder
<-
data
[,
match
(
order
,
colnames
(
data
))]
annot_colors
<-
list
(
CellTypes
=
c
(
HEC
=
"red"
,
AEC
=
"royalblue3"
),
Samples
=
c
(
AGM_CS10_Liu
=
"#00BFC4"
,
AGM_CS11_Liu
=
"#00A9FF"
,
AGM_CS13_Liu
=
"#C77CFF"
,
AGM_4wk_658
=
"#F8766D"
,
AGM_5wk_555
=
"#CD9600"
,
AGM_5wk_575
=
"#7CAE00"
,
AGM_6wk_563
=
"#00BE67"
))
annot
<-
clusters
colours
<-
annot_colors
colAnn
<-
HeatmapAnnotation
(
df
=
clusters
,
which
=
'col'
,
show_annotation_name
=
T
,
col
=
annot_colors
,
annotation_legend_param
=
list
(
CellTypes
=
list
(
title
=
"Cell Type"
,
title_gp
=
gpar
(
fontsize
=
14
*
96
/
72
,
fontface
=
"bold"
),
labels_gp
=
gpar
(
fontsize
=
14
*
96
/
72
),
grid_height
=
unit
(
0.7
,
"cm"
)),
Samples
=
list
(
title
=
"Samples"
,
title_gp
=
gpar
(
fontsize
=
14
*
96
/
72
,
fontface
=
"bold"
),
labels_gp
=
gpar
(
fontsize
=
14
*
96
/
72
),
grid_height
=
unit
(
0.7
,
"cm"
))),
annotation_width
=
unit
(
c
(
1
,
4
),
'cm'
),
gap
=
unit
(
1
,
'mm'
))
counts_scaled
<-
t
(
scale
(
t
(
data_reorder
)))
counts_scaled_ordered
<-
counts_scaled
[,
match
(
rownames
(
clusters
),
colnames
(
counts_scaled
))]
clusters
$
DF
<-
paste
(
clusters
$
CellTypes
,
clusters
$
Samples
,
sep
=
"_"
)
dd
<-
cluster_within_group
(
counts_scaled_ordered
,
clusters
$
DF
)
hmap
<-
Heatmap
(
counts_scaled_ordered
,
name
=
"Z-score"
,
show_column_names
=
F
,
show_column_dend
=
F
,
show_parent_dend_line
=
F
,
show_row_dend
=
F
,
cluster_rows
=
F
,
cluster_columns
=
dd
,
column_gap
=
unit
(
c
(
2
,
2
,
5
,
2
,
5
,
2
,
5
,
2
,
2
,
5
,
5
,
2
,
5
),
"mm"
),
top_annotation
=
colAnn
,
column_split
=
14
,
clustering_method_columns
=
"ward.D2"
,
heatmap_legend_param
=
list
(
title
=
"Z-Scores"
,
title_gp
=
gpar
(
fontsize
=
14
*
96
/
72
,
fontface
=
"bold"
),
labels_gp
=
gpar
(
fontsize
=
12
*
96
/
72
),
legend_height
=
unit
(
2
,
"cm"
)
),
width
=
unit
(
15
*
96
/
72
,
"cm"
),
height
=
unit
(
5
*
96
/
72
,
"cm"
),
use_raster
=
TRUE
,
# for raster quality, needs to be tested
raster_quality
=
5
)
draw
(
hmap
,
heatmap_legend_side
=
"right"
,
annotation_legend_side
=
"right"
)
# ED_Fig5F ----------------------------------------------------------------
## load scRNAseq data from public dataset GSE162950
## load scRNAseq data from public dataset GSE162950
load
(
"Scarfo_HEC2023/scRNAseq/seurat_object.Rdata"
)
load
(
"Scarfo_HEC2023/scRNAseq/seurat_object.Rdata"
)
EHT_scorecard
<-
c
(
"CXCR4"
,
"SOX17"
,
"GJA5"
,
"MECOM"
,
"HOXA9"
,
"SPN"
,
"CD44"
,
"ITGA2B"
,
"HLF"
,
"GFI1"
,
"MLLT3"
,
"KCNK17"
,
"MYB"
,
"STAT5A"
,
"SMIM24"
,
"RAB27B"
,
"SPINK2"
)
EHT_scorecard
<-
c
(
"CXCR4"
,
"SOX17"
,
"GJA5"
,
"MECOM"
,
"HOXA9"
,
"SPN"
,
"CD44"
,
"ITGA2B"
,
"HLF"
,
"GFI1"
,
"MLLT3"
,
"KCNK17"
,
"MYB"
,
"STAT5A"
,
"SMIM24"
,
"RAB27B"
,
"SPINK2"
)
...
@@ -32,110 +341,10 @@ DotPlot(sample_sub,
...
@@ -32,110 +341,10 @@ DotPlot(sample_sub,
dot.scale
=
10
)
+
dot.scale
=
10
)
+
coord_flip
()
coord_flip
()
# ED_Fig5C ------------------------------------------------------------------
## load Seurat data
sample
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/WNTd_HE.rds"
)
sample
@
meta.data
[[
"Cell.Annotations"
]]
<-
recode_factor
(
sample
@
meta.data
[[
"RNA_snn_res.0.6"
]],
"0"
=
"HECs"
,
"1"
=
"HECs"
,
"2"
=
"HECs"
,
"3"
=
"Venous cells"
,
"4"
=
"Venous cells"
,
"5"
=
"Arterial ECs"
,
"6"
=
"Arterial ECs"
,
"7"
=
"Venous cells"
,
"8"
=
"ECs (M phase)"
,
"9"
=
"Venous cells"
,
"10"
=
"EndoMT cells"
,
"11"
=
"HECs"
,
"12"
=
"Arterial ECs"
,
"13"
=
"Lymphatic ECs"
,
"14"
=
"Arterial ECs"
,
"15"
=
"Allantois/Placenta"
,
"16"
=
"HECs"
,
"17"
=
"HECs"
,
"18"
=
"EndoMT cells"
,
"19"
=
"SM cells"
,
"20"
=
"EndoMT cells"
,
"21"
=
"EndoMT cells"
)
DimPlot
(
object
=
sample
,
reduction
=
"umap"
,
group.by
=
"Cell.Annotations"
,
label
=
F
,
repel
=
T
,
pt.size
=
0.8
,
label.size
=
6
,
cols
=
c
(
'HECs'
=
"#377EB8"
,
'Lymphatic ECs'
=
"#4DAF4A"
,
'Arterial ECs'
=
"#E41A1C"
,
'Venous cells'
=
"#FF7F00"
,
'EndoMT cells'
=
"#E6AB02"
,
'Allantois/Placenta'
=
"#984EA3"
,
'SM cells'
=
"#A6761D"
,
"ECs (M phase)"
=
"#A50026"
))
# ED_Fig5D - ED_Fig5E ------------------------------------------------------------------
FeaturePlot
(
object
=
sample
,
features
=
c
(
"RUNX1"
,
"FCGR2B"
),
cols
=
c
(
"grey90"
,
"blue"
),
order
=
T
,
pt.size
=
0.1
)
# ED_Fig5F ------------------------------------------------------------------
runx1
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds"
)
## Monocle3
Idents
(
runx1
)
<-
"RNA_snn_res.0.6"
start
=
WhichCells
(
runx1
,
idents
=
"0"
)
expression_matrix
<-
runx1
@
assays
[[
"RNA"
]]
@
counts
cell_metadata
<-
runx1
@
meta.data
cell_metadata
$
orig.ident
<-
factor
(
cell_metadata
$
orig.ident
,
levels
=
unique
(
cell_metadata
$
orig.ident
))
gene_annotation
<-
data.frame
(
"gene_short_name"
=
rownames
(
runx1
))
rownames
(
gene_annotation
)
<-
gene_annotation
$
gene_short_name
cds
<-
new_cell_data_set
(
expression_data
=
expression_matrix
,
cell_metadata
=
cell_metadata
,
gene_metadata
=
gene_annotation
)
cds
<-
preprocess_cds
(
cds
,
num_dim
=
50
)
cds
<-
reduce_dimension
(
cds
)
## Construct and assign the made up partition
recreate.partition
<-
c
(
rep
(
1
,
length
(
cds
@
colData
@
rownames
)))
names
(
recreate.partition
)
<-
cds
@
colData
@
rownames
recreate.partition
<-
as.factor
(
recreate.partition
)
cds
@
clusters
@
listData
[[
"UMAP"
]][[
"partitions"
]]
<-
recreate.partition
## Assign the cluster info
list_cluster
<-
runx1
@
meta.data
[[
"RNA_snn_res.0.6"
]]
names
(
list_cluster
)
<-
runx1
@
assays
[[
"RNA"
]]
@
data
@
Dimnames
[[
2
]]
cds
@
clusters
@
listData
[[
"UMAP"
]][[
"clusters"
]]
<-
list_cluster
cds
@
clusters
@
listData
[[
"UMAP"
]][[
"louvain_res"
]]
<-
"NA"
## Assign UMAP coordinate
reducedDims
(
cds
)[[
"UMAP"
]]
<-
runx1
@
reductions
[[
"umap"
]]
@
cell.embeddings
### Reset colnames and rownames (consequence of UMAP replacement)
rownames
(
cds
@
principal_graph_aux
[[
'UMAP'
]]
$
dp_mst
)
<-
NULL
## Learn Graph
cds
<-
learn_graph
(
cds
=
cds
,
use_partition
=
T
,
learn_graph_control
=
list
(
ncenter
=
220
,
minimal_branch_len
=
15
),
verbose
=
T
)
cds
<-
order_cells
(
cds
,
root_cells
=
start
)
plot_cells
(
cds
,
color_cells_by
=
"pseudotime"
,
label_groups_by_cluster
=
FALSE
,
label_leaves
=
T
,
label_branch_points
=
T
,
group_label_size
=
8
,
graph_label_size
=
3
,
cell_size
=
1
,
trajectory_graph_segment_size
=
1
)
# ED_Fig5G ------------------------------------------------------------------
cds
<-
estimate_size_factors
(
cds
)
gi
<-
c
(
"H19"
,
"KCNK17"
,
"RUNX1"
,
"MYB"
,
"SPN"
)
cds_gi
<-
cds
[
rowData
(
cds
)
$
gene_short_name
%in%
gi
,]
plot_genes_in_pseudotime
(
cds_subset
=
cds_gi
,
ncol
=
3
,
color_cells_by
=
"RNA_snn_res.0.6"
,
min_expr
=
NULL
,
cell_size
=
1
)
scRNAseq/ED_Figure6_CellOracle.ipynb
0 → 100644
View file @
7dcd8057
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import celloracle as co\n",
"from celloracle.applications import Pseudotime_calculator\n",
"from celloracle.applications import Gradient_calculator\n",
"from celloracle.applications import Oracle_development_module\n",
"import anndata2ri\n",
"import os\n",
"import sys\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"import seaborn as sns\n",
"import copy\n",
"import glob\n",
"import time\n",
"import shutil\n",
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotting parameters settings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# visualization settings\n",
"%config InlineBackend.figure_format = 'retina'\n",
"%matplotlib inline\n",
"plt.rcParams['figure.figsize'] = [6, 4.5]\n",
"plt.rcParams[\"savefig.dpi\"] = 300"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"save_folder = \"Scarfo_HEC2023/scRNAseq\"\n",
"os.makedirs(save_folder, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Seurat object in Python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Activate the anndata2ri conversion between SingleCellExperiment and AnnData\n",
"anndata2ri.activate()\n",
"#Loading the rpy2 extension\n",
"%load_ext rpy2.ipython\n",
"sc.settings.verbosity = 3\n",
"sc.logging.print_versions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"suppressPackageStartupMessages(library(Seurat))\n",
"runx1_obj <- readRDS(\"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%R -o runx1_obj_sce\n",
"#convert the Seurat object to a SingleCellExperiment object\n",
"runx1_obj_sce <- as.SingleCellExperiment(runx1_obj)\n",
"runx1_obj_sce"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"adata = runx1_obj_sce\n",
"adata"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sc.pl.umap(adata, color='RNA_snn_new_res.0.6')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Constructing GRN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Select top 3000 highly-variable genes\n",
"filter_result = sc.pp.filter_genes_dispersion(adata.X,flavor='cell_ranger',n_top_genes=3000,log=False)\n",
"# Subset the genes\n",
"adata = adata[:, filter_result.gene_subset]\n",
"print(f\"Cell number is :{adata.shape[0]}\")\n",
"print(f\"Gene number is :{adata.shape[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load data from CellTalkDB\n",
"ligand_receptor = pd.read_csv(\"Scarfo_HEC2023/scRNAseq/CellTalkDB_modified.txt\", sep='\\t')\n",
"ligand_receptor.columns = ['Ligand','Receptor']\n",
"ligand_receptor\n",
"print('FCGR2B' in ligand_receptor['Ligand'].unique())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Make dictionary: dictionary key is Ligand and dictionary value is list of target genes.\n",
"LR_to_TG_dictionary = {}\n",
"for LR, TGs in zip(ligand_receptor.Ligand, ligand_receptor.Receptor):\n",
" TG_list = TGs.replace(\" \", \"\").split(\",\")\n",
" LR_to_TG_dictionary[LR] = TG_list\n",
"TG_to_LR_dictionary = co.utility.inverse_dictionary(LR_to_TG_dictionary)\n",
"TG_to_LR_dictionary"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Oracle object\n",
"oracle = co.Oracle()\n",
"oracle.import_anndata_as_raw_count(adata=adata,cluster_column_name=\"RNA_snn_new_res.0.6\",embedding_name=\"X_umap\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add LR information \n",
"oracle.addTFinfo_dictionary(TG_to_LR_dictionary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Perform PCA\n",
"oracle.perform_PCA()\n",
"# Select important PCs\n",
"plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])\n",
"n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]\n",
"plt.axvline(n_comps, c=\"k\")\n",
"plt.show()\n",
"print(n_comps)\n",
"n_comps = min(n_comps, 50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimate the optimal number of nearest neighbors for KNN imputation\n",
"n_cell = oracle.adata.shape[0]\n",
"print(f\"cell number is :{n_cell}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"k = int(0.025*n_cell)\n",
"print(f\"Auto-selected k is :{k}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# perform KNN imputation\n",
"oracle.knn_imputation(n_pca_dims=n_comps,k=k, balanced=True,b_sight=k*8,b_maxl=k*4,n_jobs=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# GRN calculation\n",
"%time\n",
"links = oracle.get_links(cluster_name_for_GRN_unit=\"RNA_snn_new_res.0.6\", alpha=10,verbose_level=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"links.links_dict.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# filter weak edges\n",
"links.filter_links(p=0.001, weight=\"coef_abs\", threshold_number=2000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# network degree distribution\n",
"links.plot_degree_distributions(plot_model=True,\n",
" save=f\"{save_folder}/degree_distribution/\",)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate network scores.\n",
"links.get_network_score()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding pseudotime to oracle object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate pseudotime object using oracle object.\n",
"pt = Pseudotime_calculator(oracle_object=oracle)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clusters_in_RUNX1_lineage = ['0','1','2','11','16','17']\n",
"# Make a dictionary\n",
"lineage_dictionary = {\"Lineage_RUNX1\": clusters_in_RUNX1_lineage}\n",
"# Input lineage information into pseudotime object\n",
"pt.set_lineage(lineage_dictionary=lineage_dictionary)\n",
"# Visualize lineage information\n",
"pt.plot_lineages()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add root cell information\n",
"# plotly is required\n",
"try:\n",
" import plotly.express as px\n",
" def plot(adata, embedding_key, cluster_column_name):\n",
" embedding = adata.obsm[embedding_key]\n",
" df = pd.DataFrame(embedding, columns=[\"x\", \"y\"])\n",
" df[\"cluster\"] = adata.obs[cluster_column_name].values\n",
" df[\"label\"] = adata.obs.index.values\n",
" fig = px.scatter(df, x=\"x\", y=\"y\", hover_name=df[\"label\"], color=\"cluster\")\n",
" fig.show()\n",
" plot(adata=pt.adata,\n",
" embedding_key=pt.obsm_key,\n",
" cluster_column_name=pt.cluster_column_name)\n",
"except:\n",
" print(\"Plotly not found in your environment. Did you install plotly?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Estimated root cell name for each lineage\n",
"root_cells = {\"Lineage_RUNX1\": \"Sample1_GGAGAACTCGCGGTAC-1\"}\n",
"pt.set_root_cells(root_cells=root_cells)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check diffusion map data.\n",
"\"X_diffmap\" in pt.adata.obsm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# calculate diffusion map\n",
"sc.pp.neighbors(pt.adata, n_neighbors=30)\n",
"sc.tl.diffmap(pt.adata)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate pseudotime\n",
"pt.get_pseudotime_per_each_lineage()\n",
"# Check results\n",
"pt.plot_pseudotime(cmap=\"rainbow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add calculated pseudotime data to the oracle object\n",
"oracle.adata.obs = pt.adata.obs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## In silico perturbation analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# make pridictive models for simulation and fit the ridge regression models again\n",
"links.filter_links()\n",
"oracle.get_cluster_specific_TFdict_from_Links(links_object=links)\n",
"oracle.fit_GRN_for_simulation(alpha=10, use_cluster_specific_TFdict=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"goi = \"FCGR2B\"\n",
"# Enter perturbation conditions to simulate signal propagation after the perturbation.\n",
"oracle.simulate_shift(perturb_condition={goi: 0.0}, n_propagation=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get transition probability\n",
"oracle.estimate_transition_prob(n_neighbors=200,knn_random=True,sampled_fraction=1)\n",
"# Calculate embedding\n",
"oracle.calculate_embedding_shift(sigma_corr=0.05)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quiver plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 6])\n",
"scale = 25\n",
"# Show quiver plot\n",
"oracle.plot_quiver(scale=scale, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_quiver_random(scale=scale, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# establishing digitalized grids\n",
"n_grid = 40\n",
"oracle.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Search for best min_mass.\n",
"oracle.suggest_mass_thresholds(n_suggestion=12)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min_mass = 19\n",
"oracle.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# shift vector plots\n",
"fig, ax = plt.subplots(1, 2, figsize=[13, 5])\n",
"scale_simulation = 2\n",
"# Show quiver plot\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax[0])\n",
"ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
"# Show quiver plot that was calculated with randomized graph.\n",
"oracle.plot_simulation_flow_random_on_grid(scale=scale_simulation, ax=ax[1])\n",
"ax[1].set_title(f\"Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot vector field with cell cluster\n",
"fig, ax = plt.subplots(figsize=[8, 8])\n",
"oracle.plot_cluster_whole(ax=ax, s=10)\n",
"oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax, show_background=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize pseudotime\n",
"fig, ax = plt.subplots(figsize=[6,6])\n",
"sc.pl.embedding(adata=oracle.adata, basis=oracle.embedding_name, ax=ax, cmap=\"viridis\",\n",
" color=[\"Pseudotime\"], save=\"pseudotime.pdf\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Instantiate Gradient calculator object\n",
"gradient = Gradient_calculator(oracle_object=oracle, pseudotime_key=\"Pseudotime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gradient.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)\n",
"gradient.calculate_mass_filter(min_mass=min_mass, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# convert pseudotime intop grid points\n",
"gradient.transfer_data_into_grid(args={\"method\": \"polynomial\", \"n_poly\":5}, plot=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calculate graddient\n",
"gradient.calculate_gradient()\n",
"# Show results\n",
"scale_dev = 20\n",
"gradient.visualize_results(scale=scale_dev, s=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualize results\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"gradient.plot_dev_flow_on_grid(scale=scale_dev, ax=ax)\n",
"ax.set_title(f\"Normal Developmental flow\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# quantitatively compare the directionality and size of vectors between perturbation simulation and natural differentiation using inner product\n",
"# Make Oracle_development_module to compare two vector field\n",
"dev = Oracle_development_module()\n",
"# Load development flow\n",
"dev.load_differentiation_reference_data(gradient_object=gradient)\n",
"# Load simulation result\n",
"dev.load_perturb_simulation_data(oracle_object=oracle)\n",
"# Calculate inner produc scores\n",
"dev.calculate_inner_product()\n",
"dev.calculate_digitized_ip(n_bins=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores\n",
"vm = 0.2\n",
"fig, ax = plt.subplots(1, 2, figsize=[12, 5])\n",
"dev.plot_inner_product_on_grid(vm=0.02, s=50, ax=ax[0])\n",
"ax[0].set_title(f\"PS calculated with FCGR2B KO\")\n",
"dev.plot_inner_product_random_on_grid(vm=vm, s=50, ax=ax[1])\n",
"ax[1].set_title(f\"PS calculated with Randomized simulation vector\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Show perturbation scores with perturbation simulation vector field\n",
"fig, ax = plt.subplots(figsize=[6, 6])\n",
"dev.plot_inner_product_on_grid(vm=0.05, s=50, ax=ax)\n",
"dev.plot_simulation_flow_on_grid(scale=scale_simulation, show_background=False, ax=ax)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# summary plots\n",
"# visualize the results\n",
"dev.visualize_development_module_layout_0(s=5, scale_for_simulation=scale_simulation, s_grid=50, scale_for_pseudotime=scale_dev, vm=0.05)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:postsc]",
"language": "python",
"name": "conda-env-postsc-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
scRNAseq/ED_Figure6_plots.R
0 → 100644
View file @
7dcd8057
suppressPackageStartupMessages
(
library
(
Seurat
))
suppressPackageStartupMessages
(
library
(
monocle3
))
suppressPackageStartupMessages
(
library
(
ggplot2
))
suppressPackageStartupMessages
(
library
(
dplyr
))
suppressPackageStartupMessages
(
library
(
dyno
))
suppressPackageStartupMessages
(
library
(
tidyverse
))
suppressPackageStartupMessages
(
library
(
magrittr
))
suppressPackageStartupMessages
(
library
(
patchwork
))
# ED_Fig6A ------------------------------------------------------------------
## load Seurat data
sample
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/WNTd_HE.rds"
)
sample
@
meta.data
[[
"Cell.Annotations"
]]
<-
recode_factor
(
sample
@
meta.data
[[
"RNA_snn_res.0.6"
]],
"0"
=
"HECs"
,
"1"
=
"HECs"
,
"2"
=
"HECs"
,
"3"
=
"Venous cells"
,
"4"
=
"Venous cells"
,
"5"
=
"Arterial ECs"
,
"6"
=
"Arterial ECs"
,
"7"
=
"Venous cells"
,
"8"
=
"ECs (M phase)"
,
"9"
=
"Venous cells"
,
"10"
=
"EndoMT cells"
,
"11"
=
"HECs"
,
"12"
=
"Arterial ECs"
,
"13"
=
"Lymphatic ECs"
,
"14"
=
"Arterial ECs"
,
"15"
=
"Allantois/Placenta"
,
"16"
=
"HECs"
,
"17"
=
"HECs"
,
"18"
=
"EndoMT cells"
,
"19"
=
"SM cells"
,
"20"
=
"EndoMT cells"
,
"21"
=
"EndoMT cells"
)
DimPlot
(
object
=
sample
,
reduction
=
"umap"
,
group.by
=
"Cell.Annotations"
,
label
=
F
,
repel
=
T
,
pt.size
=
0.8
,
label.size
=
6
,
cols
=
c
(
'HECs'
=
"#377EB8"
,
'Lymphatic ECs'
=
"#4DAF4A"
,
'Arterial ECs'
=
"#E41A1C"
,
'Venous cells'
=
"#FF7F00"
,
'EndoMT cells'
=
"#E6AB02"
,
'Allantois/Placenta'
=
"#984EA3"
,
'SM cells'
=
"#A6761D"
,
"ECs (M phase)"
=
"#A50026"
))
# ED_Fig6B - ED_Fig6C ------------------------------------------------------------------
FeaturePlot
(
object
=
sample
,
features
=
c
(
"RUNX1"
,
"FCGR2B"
),
cols
=
c
(
"grey90"
,
"blue"
),
order
=
T
,
pt.size
=
0.1
)
# ED_Fig6D ------------------------------------------------------------------
runx1
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds"
)
## Monocle3
Idents
(
runx1
)
<-
"RNA_snn_res.0.6"
start
=
WhichCells
(
runx1
,
idents
=
"0"
)
expression_matrix
<-
runx1
@
assays
[[
"RNA"
]]
@
counts
cell_metadata
<-
runx1
@
meta.data
cell_metadata
$
orig.ident
<-
factor
(
cell_metadata
$
orig.ident
,
levels
=
unique
(
cell_metadata
$
orig.ident
))
gene_annotation
<-
data.frame
(
"gene_short_name"
=
rownames
(
runx1
))
rownames
(
gene_annotation
)
<-
gene_annotation
$
gene_short_name
cds
<-
new_cell_data_set
(
expression_data
=
expression_matrix
,
cell_metadata
=
cell_metadata
,
gene_metadata
=
gene_annotation
)
cds
<-
preprocess_cds
(
cds
,
num_dim
=
50
)
cds
<-
reduce_dimension
(
cds
)
## Construct and assign the made up partition
recreate.partition
<-
c
(
rep
(
1
,
length
(
cds
@
colData
@
rownames
)))
names
(
recreate.partition
)
<-
cds
@
colData
@
rownames
recreate.partition
<-
as.factor
(
recreate.partition
)
cds
@
clusters
@
listData
[[
"UMAP"
]][[
"partitions"
]]
<-
recreate.partition
## Assign the cluster info
list_cluster
<-
runx1
@
meta.data
[[
"RNA_snn_res.0.6"
]]
names
(
list_cluster
)
<-
runx1
@
assays
[[
"RNA"
]]
@
data
@
Dimnames
[[
2
]]
cds
@
clusters
@
listData
[[
"UMAP"
]][[
"clusters"
]]
<-
list_cluster
cds
@
clusters
@
listData
[[
"UMAP"
]][[
"louvain_res"
]]
<-
"NA"
## Assign UMAP coordinate
reducedDims
(
cds
)[[
"UMAP"
]]
<-
runx1
@
reductions
[[
"umap"
]]
@
cell.embeddings
### Reset colnames and rownames (consequence of UMAP replacement)
rownames
(
cds
@
principal_graph_aux
[[
'UMAP'
]]
$
dp_mst
)
<-
NULL
## Learn Graph
cds
<-
learn_graph
(
cds
=
cds
,
use_partition
=
T
,
learn_graph_control
=
list
(
ncenter
=
220
,
minimal_branch_len
=
15
),
verbose
=
T
)
cds
<-
order_cells
(
cds
,
root_cells
=
start
)
plot_cells
(
cds
,
color_cells_by
=
"pseudotime"
,
label_groups_by_cluster
=
FALSE
,
label_leaves
=
T
,
label_branch_points
=
T
,
group_label_size
=
8
,
graph_label_size
=
3
,
cell_size
=
1
,
trajectory_graph_segment_size
=
1
)
# ED_Fig6E ------------------------------------------------------------------
cds
<-
estimate_size_factors
(
cds
)
gi
<-
c
(
"H19"
,
"KCNK17"
,
"RUNX1"
,
"MYB"
,
"SPN"
)
cds_gi
<-
cds
[
rowData
(
cds
)
$
gene_short_name
%in%
gi
,]
plot_genes_in_pseudotime
(
cds_subset
=
cds_gi
,
ncol
=
3
,
color_cells_by
=
"RNA_snn_res.0.6"
,
min_expr
=
NULL
,
cell_size
=
1
)
# ED_Fig6F - ED_Fig6G ------------------------------------------------------------------
## load Seurat data
sample
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/WNTd_HE.rds"
)
runx1
<-
subset
(
sample
,
idents
=
c
(
"0"
,
"1"
,
"2"
,
"11"
,
"16"
,
"17"
))
Idents
(
runx1
)
<-
"RNA_snn_new_res.0.6"
# preparing data for dyno
df
<-
as.matrix
(
runx1
[[
"RNA"
]]
@
data
)
counts
<-
Matrix
::
t
(
as
(
as.matrix
(
runx1
@
assays
$
RNA
@
counts
),
'sparseMatrix'
))
expression
<-
Matrix
::
t
(
as
(
as.matrix
(
runx1
@
assays
$
RNA
@
data
),
'sparseMatrix'
))
dataset_runx1
<-
wrap_expression
(
expression
=
expression
,
counts
=
counts
)
dataset_runx1
<-
dataset_runx1
%>%
add_prior_information
(
start_id
=
"Sample1_ATCTCTATCCAATCCC-1"
,
# from cluster 0
start_n
=
0
)
%>%
add_grouping
(
grouping
=
runx1
$
RNA_snn_new_res.0.6
)
%>%
add_dimred
(
Embeddings
(
runx1
,
"umap"
))
# selecting method
guidelines
<-
guidelines_shiny
(
dataset_runx1
)
methods_selected
<-
guidelines
$
methods_selected
methods_selected
# running top model
model_paga_t
<-
infer_trajectory
(
dataset_runx1
,
method
=
ti_paga_tree
(),
verbose
=
T
,
give_priors
=
c
(
"start_id"
,
"start_n"
))
# plotting trajectory and pseudotime scores
patchwork
::
wrap_plots
(
plot_dimred
(
model_paga_t
,
size_cells
=
0.5
,
grouping
=
dataset_runx1
$
grouping
)
+
ggtitle
(
" RUNX1 Clusters"
)
+
scale_color_manual
(
values
=
c
(
"#F8766D"
,
"#B79F00"
,
"#00BFC4"
,
"#619CFF"
,
"#F564E3"
,
"#00BA38"
))
+
labs
(
color
=
"Clusters"
)
+
guides
(
color
=
guide_legend
(
override.aes
=
list
(
size
=
2
))),
plot_dimred
(
model_paga_t
,
size_cells
=
0.5
,
"pseudotime"
,
pseudotime
=
calculate_pseudotime
(
model_paga_t
))
+
ggtitle
(
"Pseudotime"
)
)
scRNAseq/Fig
5
_plots.R
→
scRNAseq/Fig
4
_plots.R
View file @
7dcd8057
...
@@ -9,7 +9,7 @@ suppressPackageStartupMessages(library(clusterProfiler))
...
@@ -9,7 +9,7 @@ suppressPackageStartupMessages(library(clusterProfiler))
## load Seurat data
## load Seurat data
sample
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/WNTd_HE.rds"
)
sample
<-
readRDS
(
"Scarfo_HEC2023/scRNAseq/WNTd_HE.rds"
)
# Fig
5
A ------------------------------------------------------------------
# Fig
4
A ------------------------------------------------------------------
DimPlot
(
object
=
sample
,
DimPlot
(
object
=
sample
,
reduction
=
"umap"
,
reduction
=
"umap"
,
...
@@ -72,7 +72,7 @@ DimPlot(object = runx1,
...
@@ -72,7 +72,7 @@ DimPlot(object = runx1,
label.size
=
8
)
label.size
=
8
)
# Fig
5
B ----------------------------------------------------------------
# Fig
4
B ----------------------------------------------------------------
## Monocle3
## Monocle3
Idents
(
runx1
)
<-
"RNA_snn_res.0.6"
Idents
(
runx1
)
<-
"RNA_snn_res.0.6"
...
@@ -114,7 +114,7 @@ plot_cells(cds,
...
@@ -114,7 +114,7 @@ plot_cells(cds,
cell_size
=
1
)
cell_size
=
1
)
# Fig
5
C -------------------------------------------------------------------
# Fig
4
C -------------------------------------------------------------------
## differential markers
## differential markers
degs_clu11_vs_clu0_1_2
<-
FindMarkers
(
object
=
runx1
,
ident.1
=
11
,
ident.2
=
c
(
0
,
1
,
2
),
only.pos
=
FALSE
,
min.pct
=
0
,
test.use
=
"wilcox"
,
logfc.threshold
=
0
,
min.cells.group
=
0
)
degs_clu11_vs_clu0_1_2
<-
FindMarkers
(
object
=
runx1
,
ident.1
=
11
,
ident.2
=
c
(
0
,
1
,
2
),
only.pos
=
FALSE
,
min.pct
=
0
,
test.use
=
"wilcox"
,
logfc.threshold
=
0
,
min.cells.group
=
0
)
...
@@ -171,7 +171,7 @@ for (i in comp) {
...
@@ -171,7 +171,7 @@ for (i in comp) {
assign
(
paste0
(
"p_"
,
i
),
p
)
assign
(
paste0
(
"p_"
,
i
),
p
)
}
}
# Fig
5
D -------------------------------------------------------------------
# Fig
4
D -------------------------------------------------------------------
runx1
@
meta.data
[[
"Phase"
]]
<-
recode_factor
(
runx1
@
meta.data
[[
"Phase"
]],
runx1
@
meta.data
[[
"Phase"
]]
<-
recode_factor
(
runx1
@
meta.data
[[
"Phase"
]],
"G1"
=
"G1"
,
"G1"
=
"G1"
,
...
@@ -243,7 +243,7 @@ for (i in cc.clusters) {
...
@@ -243,7 +243,7 @@ for (i in cc.clusters) {
}
}
p_final
<-
cc_umap
/
(
`p_clu_0,1,2`
|
p_clu_11
|
`p_clu_16,17`
)
p_final
<-
cc_umap
/
(
`p_clu_0,1,2`
|
p_clu_11
|
`p_clu_16,17`
)
# Fig
5
E -------------------------------------------------------------------
# Fig
4
E -------------------------------------------------------------------
cds
<-
order_cells
(
cds
,
root_cells
=
start
)
cds
<-
order_cells
(
cds
,
root_cells
=
start
)
cds
<-
estimate_size_factors
(
cds
)
cds
<-
estimate_size_factors
(
cds
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment