revisions_update_v1

7dcd8057 · Monah Abou Alezz · 839b34cc · 7dcd8057 · 7dcd8057 · 7dcd8057
Commit 7dcd8057 authored Dec 12, 2023 by Monah Abou Alezz
--- a/BulkRNAseq_2/ED_Figure5_plots.R
+++ b/BulkRNAseq_2/ED_Figure5_plots.R
@@ -73,6 +73,7 @@ DGE.results.sig <- subset(DGE.results.sorted,
                          FDR < adj.pval.thr & abs(logFC) > logfc.thr)
 DGEgenes <- rownames(DGE.results.sig)
 # ED_Fig5A -----------------------------------------------------------------
 ## scorecard

--- a/BulkRNAseq_2/featureCounts_results_tpm.txt
+++ b/BulkRNAseq_2/featureCounts_results_tpm.txt
--- a/scRNAseq/.ipynb_checkpoints/CellOracle-checkpoint.ipynb
+++ b/scRNAseq/.ipynb_checkpoints/CellOracle-checkpoint.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import celloracle as co\n",
+    "from celloracle.applications import Pseudotime_calculator\n",
+    "from celloracle.applications import Gradient_calculator\n",
+    "from celloracle.applications import Oracle_development_module\n",
+    "import anndata2ri\n",
+    "import os\n",
+    "import sys\n",
+    "import time\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import scanpy as sc\n",
+    "import seaborn as sns\n",
+    "import copy\n",
+    "import glob\n",
+    "import time\n",
+    "import shutil\n",
+    "from tqdm.auto import tqdm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plotting parameters settings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# visualization settings\n",
+    "%config InlineBackend.figure_format = 'retina'\n",
+    "%matplotlib inline\n",
+    "plt.rcParams['figure.figsize'] = [6, 4.5]\n",
+    "plt.rcParams[\"savefig.dpi\"] = 300"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "save_folder = \"Scarfo_HEC2023/scRNAseq\"\n",
+    "os.makedirs(save_folder, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Seurat object in Python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Activate the anndata2ri conversion between SingleCellExperiment and AnnData\n",
+    "anndata2ri.activate()\n",
+    "#Loading the rpy2 extension\n",
+    "%load_ext rpy2.ipython\n",
+    "sc.settings.verbosity = 3\n",
+    "sc.logging.print_versions()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%R\n",
+    "suppressPackageStartupMessages(library(Seurat))\n",
+    "runx1_obj <- readRDS(\"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%R -o runx1_obj_sce\n",
+    "#convert the Seurat object to a SingleCellExperiment object\n",
+    "runx1_obj_sce <- as.SingleCellExperiment(runx1_obj)\n",
+    "runx1_obj_sce"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata = runx1_obj_sce\n",
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Cell number is :{adata.shape[0]}\")\n",
+    "print(f\"Gene number is :{adata.shape[1]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sc.pl.umap(adata, color='RNA_snn_new_res.0.6')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constructing GRN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Select top 3000 highly-variable genes\n",
+    "filter_result = sc.pp.filter_genes_dispersion(adata.X,flavor='cell_ranger',n_top_genes=3000,log=False)\n",
+    "# Subset the genes\n",
+    "adata = adata[:, filter_result.gene_subset]\n",
+    "print(f\"Cell number is :{adata.shape[0]}\")\n",
+    "print(f\"Gene number is :{adata.shape[1]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load data from CellTalkDB\n",
+    "ligand_receptor = pd.read_csv(\"Scarfo_HEC2023/scRNAseq/CellTalkDB_modified.txt\", sep='\\t')\n",
+    "ligand_receptor.columns = ['Ligand','Receptor']\n",
+    "ligand_receptor\n",
+    "print('FCGR2B' in ligand_receptor['Ligand'].unique())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make dictionary: dictionary key is Ligand and dictionary value is list of target genes.\n",
+    "LR_to_TG_dictionary = {}\n",
+    "for LR, TGs in zip(ligand_receptor.Ligand, ligand_receptor.Receptor):\n",
+    "    TG_list = TGs.replace(\" \", \"\").split(\",\")\n",
+    "    LR_to_TG_dictionary[LR] = TG_list\n",
+    "TG_to_LR_dictionary = co.utility.inverse_dictionary(LR_to_TG_dictionary)\n",
+    "TG_to_LR_dictionary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate Oracle object\n",
+    "oracle = co.Oracle()\n",
+    "oracle.import_anndata_as_raw_count(adata=adata,cluster_column_name=\"RNA_snn_new_res.0.6\",embedding_name=\"X_umap\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add LR information \n",
+    "oracle.addTFinfo_dictionary(TG_to_LR_dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Perform PCA\n",
+    "oracle.perform_PCA()\n",
+    "# Select important PCs\n",
+    "plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])\n",
+    "n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]\n",
+    "plt.axvline(n_comps, c=\"k\")\n",
+    "plt.show()\n",
+    "print(n_comps)\n",
+    "n_comps = min(n_comps, 50)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Estimate the optimal number of nearest neighbors for KNN imputation\n",
+    "n_cell = oracle.adata.shape[0]\n",
+    "print(f\"cell number is :{n_cell}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k = int(0.025*n_cell)\n",
+    "print(f\"Auto-selected k is :{k}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# perform KNN imputation\n",
+    "oracle.knn_imputation(n_pca_dims=n_comps,k=k, balanced=True,b_sight=k*8,b_maxl=k*4,n_jobs=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# GRN calculation\n",
+    "%time\n",
+    "links = oracle.get_links(cluster_name_for_GRN_unit=\"RNA_snn_new_res.0.6\", alpha=10,verbose_level=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "links.links_dict.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# filter weak edges\n",
+    "links.filter_links(p=0.001, weight=\"coef_abs\", threshold_number=2000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# network degree distribution\n",
+    "links.plot_degree_distributions(plot_model=True,\n",
+    "                                save=f\"{save_folder}/degree_distribution/\",)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate network scores.\n",
+    "links.get_network_score()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Adding pseudotime to oracle object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate pseudotime object using oracle object.\n",
+    "pt = Pseudotime_calculator(oracle_object=oracle)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clusters_in_RUNX1_lineage = ['0','1','2','11','16','17']\n",
+    "# Make a dictionary\n",
+    "lineage_dictionary = {\"Lineage_RUNX1\": clusters_in_RUNX1_lineage}\n",
+    "# Input lineage information into pseudotime object\n",
+    "pt.set_lineage(lineage_dictionary=lineage_dictionary)\n",
+    "# Visualize lineage information\n",
+    "pt.plot_lineages()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add root cell information\n",
+    "# plotly is required\n",
+    "try:\n",
+    "    import plotly.express as px\n",
+    "    def plot(adata, embedding_key, cluster_column_name):\n",
+    "        embedding = adata.obsm[embedding_key]\n",
+    "        df = pd.DataFrame(embedding, columns=[\"x\", \"y\"])\n",
+    "        df[\"cluster\"] = adata.obs[cluster_column_name].values\n",
+    "        df[\"label\"] = adata.obs.index.values\n",
+    "        fig = px.scatter(df, x=\"x\", y=\"y\", hover_name=df[\"label\"], color=\"cluster\")\n",
+    "        fig.show()\n",
+    "    plot(adata=pt.adata,\n",
+    "         embedding_key=pt.obsm_key,\n",
+    "         cluster_column_name=pt.cluster_column_name)\n",
+    "except:\n",
+    "    print(\"Plotly not found in your environment. Did you install plotly?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Estimated root cell name for each lineage\n",
+    "root_cells = {\"Lineage_RUNX1\": \"Sample1_GGAGAACTCGCGGTAC-1\"}\n",
+    "pt.set_root_cells(root_cells=root_cells)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check diffusion map data.\n",
+    "\"X_diffmap\" in pt.adata.obsm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calculate diffusion map\n",
+    "sc.pp.neighbors(pt.adata, n_neighbors=30)\n",
+    "sc.tl.diffmap(pt.adata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate pseudotime\n",
+    "pt.get_pseudotime_per_each_lineage()\n",
+    "# Check results\n",
+    "pt.plot_pseudotime(cmap=\"rainbow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add calculated pseudotime data to the oracle object\n",
+    "oracle.adata.obs = pt.adata.obs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## In silico perturbation analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make pridictive models for simulation and fit the ridge regression models again\n",
+    "links.filter_links()\n",
+    "oracle.get_cluster_specific_TFdict_from_Links(links_object=links)\n",
+    "oracle.fit_GRN_for_simulation(alpha=10, use_cluster_specific_TFdict=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "goi = \"FCGR2B\"\n",
+    "# Enter perturbation conditions to simulate signal propagation after the perturbation.\n",
+    "oracle.simulate_shift(perturb_condition={goi: 0.0}, n_propagation=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get transition probability\n",
+    "oracle.estimate_transition_prob(n_neighbors=200,knn_random=True,sampled_fraction=1)\n",
+    "# Calculate embedding\n",
+    "oracle.calculate_embedding_shift(sigma_corr=0.05)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# quiver plots\n",
+    "fig, ax = plt.subplots(1, 2,  figsize=[13, 6])\n",
+    "scale = 25\n",
+    "# Show quiver plot\n",
+    "oracle.plot_quiver(scale=scale, ax=ax[0])\n",
+    "ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
+    "# Show quiver plot that was calculated with randomized graph.\n",
+    "oracle.plot_quiver_random(scale=scale, ax=ax[1])\n",
+    "ax[1].set_title(f\"Randomized simulation vector\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# establishing digitalized grids\n",
+    "n_grid = 40\n",
+    "oracle.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Search for best min_mass.\n",
+    "oracle.suggest_mass_thresholds(n_suggestion=12)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "min_mass = 19\n",
+    "oracle.calculate_mass_filter(min_mass=min_mass, plot=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# shift vector plots\n",
+    "fig, ax = plt.subplots(1, 2,  figsize=[13, 5])\n",
+    "scale_simulation = 2\n",
+    "# Show quiver plot\n",
+    "oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax[0])\n",
+    "ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
+    "# Show quiver plot that was calculated with randomized graph.\n",
+    "oracle.plot_simulation_flow_random_on_grid(scale=scale_simulation, ax=ax[1])\n",
+    "ax[1].set_title(f\"Randomized simulation vector\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot vector field with cell cluster\n",
+    "fig, ax = plt.subplots(figsize=[8, 8])\n",
+    "oracle.plot_cluster_whole(ax=ax, s=10)\n",
+    "oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax, show_background=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize pseudotime\n",
+    "fig, ax = plt.subplots(figsize=[6,6])\n",
+    "sc.pl.embedding(adata=oracle.adata, basis=oracle.embedding_name, ax=ax, cmap=\"viridis\",\n",
+    "                color=[\"Pseudotime\"], save=\"pseudotime.pdf\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate Gradient calculator object\n",
+    "gradient = Gradient_calculator(oracle_object=oracle, pseudotime_key=\"Pseudotime\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gradient.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)\n",
+    "gradient.calculate_mass_filter(min_mass=min_mass, plot=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert pseudotime intop grid points\n",
+    "gradient.transfer_data_into_grid(args={\"method\": \"polynomial\", \"n_poly\":5}, plot=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate graddient\n",
+    "gradient.calculate_gradient()\n",
+    "# Show results\n",
+    "scale_dev = 20\n",
+    "gradient.visualize_results(scale=scale_dev, s=5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize results\n",
+    "fig, ax = plt.subplots(figsize=[6, 6])\n",
+    "gradient.plot_dev_flow_on_grid(scale=scale_dev, ax=ax)\n",
+    "ax.set_title(f\"Normal Developmental flow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# quantitatively compare the directionality and size of vectors between perturbation simulation and natural differentiation using inner product\n",
+    "# Make Oracle_development_module to compare two vector field\n",
+    "dev = Oracle_development_module()\n",
+    "# Load development flow\n",
+    "dev.load_differentiation_reference_data(gradient_object=gradient)\n",
+    "# Load simulation result\n",
+    "dev.load_perturb_simulation_data(oracle_object=oracle)\n",
+    "# Calculate inner produc scores\n",
+    "dev.calculate_inner_product()\n",
+    "dev.calculate_digitized_ip(n_bins=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show perturbation scores\n",
+    "vm = 0.2\n",
+    "fig, ax = plt.subplots(1, 2, figsize=[12, 5])\n",
+    "dev.plot_inner_product_on_grid(vm=0.02, s=50, ax=ax[0])\n",
+    "ax[0].set_title(f\"PS calculated with FCGR2B KO\")\n",
+    "dev.plot_inner_product_random_on_grid(vm=vm, s=50, ax=ax[1])\n",
+    "ax[1].set_title(f\"PS calculated with Randomized simulation vector\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show perturbation scores with perturbation simulation vector field\n",
+    "fig, ax = plt.subplots(figsize=[6, 6])\n",
+    "dev.plot_inner_product_on_grid(vm=0.05, s=50, ax=ax)\n",
+    "dev.plot_simulation_flow_on_grid(scale=scale_simulation, show_background=False, ax=ax)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# summary plots\n",
+    "# visualize the results\n",
+    "dev.visualize_development_module_layout_0(s=5, scale_for_simulation=scale_simulation, s_grid=50, scale_for_pseudotime=scale_dev, vm=0.05)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:postsc]",
+   "language": "python",
+   "name": "conda-env-postsc-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/scRNAseq/CellTalkDB_modified.txt
+++ b/scRNAseq/CellTalkDB_modified.txt
--- a/scRNAseq/ED_Figure5_plots.R
+++ b/scRNAseq/ED_Figure5_plots.R
@@ -2,9 +2,318 @@ suppressPackageStartupMessages(library(Seurat))
 suppressPackageStartupMessages(library(monocle3))
 suppressPackageStartupMessages(library(ggplot2))
 suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(ComplexHeatmap))
 # ED_Fig5B ----------------------------------------------------------------
+## load scRNAseq data from public dataset GSE162950
+load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata")
+sample <- SetIdent(sample, value = sample@meta.data$seurat_clusters)
+DimPlot(sample, label=TRUE, group.by = "seurat_clusters")
+sample@meta.data[["orig.ident"]] <- factor(sample@meta.data[["orig.ident"]], 
+                                           levels = c("AGM_CS10_Liu",
+                                                      "AGM_CS11_Liu",
+                                                      "AGM_CS13_Liu",
+                                                      "AGM_4wk_658",
+                                                      "AGM_5wk_555",
+                                                      "AGM_5wk_575",
+                                                      "AGM_6wk_563",
+                                                      "YolkSac_CS11_Liu"))
+DimPlot(sample, 
+        label=F,
+        repel = T,
+        reduction="umap", 
+        group.by="orig.ident", 
+        pt.size=0.1,
+        cols = c("AGM_CS10_Liu" = "#00BFC4",
+                 "AGM_CS11_Liu" = "#00A9FF",
+                 "AGM_CS13_Liu" = "#C77CFF",
+                 "AGM_4wk_658" = "#F8766D",
+                 "AGM_5wk_555" = "#CD9600",
+                 "AGM_5wk_575" = "#7CAE00",
+                 "AGM_6wk_563" = "#00BE67",
+                 "YolkSac_CS11_Liu" = "#FF61CC"))
+# ED_Fig5C ----------------------------------------------------------------
+pal <- viridis(n = 10, option = "D")
+FeaturePlot(sample,
+            features = c("HOXA9", "HOXA10"),
+            ncol = 2,
+            repel = T,
+            pt.size = 0.05,
+            order = T,
+            cols = pal)
+# ED_Fig5E ----------------------------------------------------------------
+install_github("sturgeonlab/Luff-etal-2021/SingleR/versioncontrolscripts")
+library(versioncontrolscripts)
+## load reference dataset
+cd32_dll4 <- read.table("Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_tpm.txt", 
+                        header=T, sep="\t")
+## load Seurat data
+load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata")
+sample <- SetIdent(sample, value = sample@meta.data$seurat_clusters)
+sample_noYS <- subset(sample, orig.ident != "YolkSac_CS11_Liu")
+sample_noYS@meta.data[["orig.ident"]] <-
+  factor(
+    sample_noYS@meta.data[["orig.ident"]],
+    levels = c(
+      "AGM_CS10_Liu",
+      "AGM_CS11_Liu",
+      "AGM_CS13_Liu",
+      "AGM_4wk_658",
+      "AGM_5wk_555",
+      "AGM_5wk_575",
+      "AGM_6wk_563"
+    )
+  )
+## defining HEC and AEC
+aec <- WhichCells(sample_noYS, 
+                 expression = CDH5 > 0.2 & 
+                   CXCR4 > 0.3 & 
+                   GJA5 > 0.2 & 
+                   DLL4 > 0.1 & 
+                   PTPRC < 0.1 & 
+                   SPN < 0.05 & 
+                   HEY2 > 0.15)
+hec <- WhichCells(sample_noYS, 
+                  expression =  RUNX1 > 0.1 & 
+                    CDH5 > 0.2 & 
+                    HOXA9 > 0.05 & 
+                    PTPRC < 0.1 & 
+                    ITGA2B < 0.1 & 
+                    SPN < 0.05)
+Idents(sample_noYS, cells=aec) = "AEC"
+Idents(sample_noYS, cells=hec) = "HEC"
+sample_noYS$CellType <- Idents(sample_noYS)
+sample_noYS <- subset(sample_noYS, CellType=="AEC" | CellType=="HEC")
+Idents(sample_noYS) <- "CellType"
+## setting up reference dataset
+bulk <- cd32_dll4[,-c(2:6)]
+bulk <- bulk[,c(1,2,3,4,6,7,9)]
+colnames(bulk) <- c("genes",
+                    "RS227_CD184","RS227_CD32_plus",
+                    "RS242_CD184","RS242_CD32_plus",
+                    "RS243_CD184","RS243_CD32_plus")
+bulk$CD184 <- rowMeans(bulk[,c(2,4,6)])
+bulk$CD32_plus <- rowMeans(bulk[,c(3,5,7)])
+bulk <- bulk[,c(1,9,8)]
+types = list("CD32_plus","CD184")
+main_types = list("CD32_plus","CD184")
+x = bulk[,1]
+loss = bulk[,-1]
+rownames(loss) = make.names(x, unique = TRUE)
+bulk <- loss
+bulk = as.matrix(bulk)
+types = as.character(types)
+main_types = as.character(main_types)
+myref = list(data = bulk, main_types = main_types, types = types)
+myref[["de.genes"]] = CreateVariableGeneSet(bulk,types,200)
+myref[["de.genes.main"]] = CreateVariableGeneSet(bulk,main_types,300)
+labels <- sample_noYS@meta.data$orig.ident
+stage <- sample_noYS@meta.data$CellType
+stage2 <- data.frame(stage)
+stage2$stage <- as.character(stage2$stage)
+stage2 <- stage2$stage
+# create singlecell dataset for comparison to bulk RNAseq
+data <- GetAssayData(sample_noYS, assay = "RNA", slot = "data")
+data <- as.matrix(data)
+# run main SingleR function & add names for cluster
+obj <- SingleR.CreateObject(data, 
+                            myref, 
+                            clusters = NULL, 
+                            variable.genes = "de", 
+                            fine.tune = F)
+obj[["names"]] <- stage2
+obj[["names2"]] <- labels
+data <- t(obj$SingleR.single.main$r)
+cell_types = as.data.frame(obj$names)
+colnames(cell_types) = 'CellTypes'
+rownames(cell_types) = colnames(data)
+test <- as.data.frame(obj$names2)
+colnames(test) <- "Samples"
+rownames(test) <- colnames(data)
+clusters <- cbind(cell_types, test)
+breaksList = seq(0, 1, by = 0.01)
+# export relative correlation scores and names for each cell in cs dataset
+scores = obj$SingleR.single.main$r
+datascores <- as.matrix(scores)
+scores <- data.frame(datascores)
+type <- data.frame(obj$names)
+cells <- data.frame(obj$names2)
+scores$type <- type$obj.names
+scores$cells <- cells$obj.names2
+#successive sorting orders cells in aesthetically-pleasing manner
+for (k in unique(scores$cells)) {
+  clu <- filter(scores, cells == k)
+  clu <- clu[order(clu$CD184), ]
+  clu <- clu[order(clu$CD32_plus), ]
+  clu_cells <- rownames(clu)
+  assign(paste0("clu",k,"_cells"),clu_cells)
+}
+## reorder both clusters and HEC and AEC cells
+agm_4 <- filter(scores, cells == "AGM_4wk_658")
+agm_4_hec <- filter(agm_4, type == "HEC")
+agm_4_hec <- agm_4_hec[order(agm_4_hec$CD184),]
+agm_4_hec <- agm_4_hec[order(agm_4_hec$CD32_plus),]
+agm_4_aec <- filter(agm_4, type == "AEC")
+agm_4_aec <- agm_4_aec[order(agm_4_aec$CD184),]
+agm_4_aec <- agm_4_aec[order(agm_4_aec$CD32_plus),]
+agm_5_1 <- filter(scores, cells == "AGM_5wk_555")
+agm_5_1_hec <- filter(agm_5_1, type == "HEC")
+agm_5_1_hec <- agm_5_1_hec[order(agm_5_1_hec$CD184),]
+agm_5_1_hec <- agm_5_1_hec[order(agm_5_1_hec$CD32_plus),]
+agm_5_1_aec <- filter(agm_5_1, type == "AEC")
+agm_5_1_aec <- agm_5_1_aec[order(agm_5_1_aec$CD184),]
+agm_5_1_aec <- agm_5_1_aec[order(agm_5_1_aec$CD32_plus),]
+agm_5_2 <- filter(scores, cells == "AGM_5wk_575")
+agm_5_2_hec <- filter(agm_5_2, type == "HEC")
+agm_5_2_hec <- agm_5_2_hec[order(agm_5_2_hec$CD184),]
+agm_5_2_hec <- agm_5_2_hec[order(agm_5_2_hec$CD32_plus),]
+agm_5_2_aec <- filter(agm_5_2, type == "AEC")
+agm_5_2_aec <- agm_5_2_aec[order(agm_5_2_aec$CD184),]
+agm_5_2_aec <- agm_5_2_aec[order(agm_5_2_aec$CD32_plus),]
+agm_6 <- filter(scores, cells == "AGM_6wk_563")
+agm_6_hec <- filter(agm_6, type == "HEC")
+agm_6_hec <- agm_6_hec[order(agm_6_hec$CD184),]
+agm_6_hec <- agm_6_hec[order(agm_6_hec$CD32_plus),]
+agm_6_aec <- filter(agm_6, type == "AEC")
+agm_6_aec <- agm_6_aec[order(agm_6_aec$CD184),]
+agm_6_aec <- agm_6_aec[order(agm_6_aec$CD32_plus),]
+agm_cs10 <- filter(scores, cells == "AGM_CS10_Liu")
+agm_cs10_hec <- filter(agm_cs10, type == "HEC")
+agm_cs10_hec <- agm_cs10_hec[order(agm_cs10_hec$CD184),]
+agm_cs10_hec <- agm_cs10_hec[order(agm_cs10_hec$CD32_plus),]
+agm_cs10_aec <- filter(agm_cs10, type == "AEC")
+agm_cs10_aec <- agm_cs10_aec[order(agm_cs10_aec$CD184),]
+agm_cs10_aec <- agm_cs10_aec[order(agm_cs10_aec$CD32_plus),]
+agm_cs11 <- filter(scores, cells == "AGM_CS11_Liu")
+agm_cs11_hec <- filter(agm_cs11, type == "HEC")
+agm_cs11_hec <- agm_cs11_hec[order(agm_cs11_hec$CD184),]
+agm_cs11_hec <- agm_cs11_hec[order(agm_cs11_hec$CD32_plus),]
+agm_cs11_aec <- filter(agm_cs11, type == "AEC")
+agm_cs11_aec <- agm_cs11_aec[order(agm_cs11_aec$CD184),]
+agm_cs11_aec <- agm_cs11_aec[order(agm_cs11_aec$CD32_plus),]
+agm_cs13 <- filter(scores, cells == "AGM_CS13_Liu")
+agm_cs13_hec <- filter(agm_cs13, type == "HEC")
+agm_cs13_hec <- agm_cs13_hec[order(agm_cs13_hec$CD184),]
+agm_cs13_hec <- agm_cs13_hec[order(agm_cs13_hec$CD32_plus),]
+agm_cs13_aec <- filter(agm_cs13, type == "AEC")
+agm_cs13_aec <- agm_cs13_aec[order(agm_cs13_aec$CD184),]
+agm_cs13_aec <- agm_cs13_aec[order(agm_cs13_aec$CD32_plus),]
+agm_4_hec.cells <- row.names(agm_4_hec)
+agm_4_aec.cells <- row.names(agm_4_aec)
+agm_5_1_hec.cells <- row.names(agm_5_1_hec)
+agm_5_1_aec.cells <- row.names(agm_5_1_aec)
+agm_5_2_hec.cells <- row.names(agm_5_2_hec)
+agm_5_2_aec.cells <- row.names(agm_5_2_aec)
+agm_6_hec.cells <- row.names(agm_6_hec)
+agm_6_aec.cells <- row.names(agm_6_aec)
+agm_cs10_hec.cells <- row.names(agm_cs10_hec)
+agm_cs10_aec.cells <- row.names(agm_cs10_aec)
+agm_cs11_hec.cells <- row.names(agm_cs11_hec)
+agm_cs11_aec.cells <- row.names(agm_cs11_aec)
+agm_cs13_hec.cells <- row.names(agm_cs13_hec)
+agm_cs13_aec.cells <- row.names(agm_cs13_aec)
+order <- c(
+  agm_cs10_aec.cells,
+  agm_cs11_aec.cells,
+  agm_cs13_aec.cells,
+  agm_4_aec.cells,
+  agm_5_1_aec.cells,
+  agm_5_2_aec.cells,
+  agm_6_aec.cells,
+  agm_cs10_hec.cells,
+  agm_cs11_hec.cells,
+  agm_cs13_hec.cells,
+  agm_4_hec.cells,
+  agm_5_1_hec.cells,
+  agm_5_2_hec.cells,
+  agm_6_hec.cells
+)
+data_reorder <- data[,match(order, colnames(data))]
+annot_colors <- list(CellTypes=c(HEC = "red",
+                                 AEC = "royalblue3"),
+                     Samples = c(AGM_CS10_Liu = "#00BFC4",
+                                 AGM_CS11_Liu = "#00A9FF",
+                                 AGM_CS13_Liu = "#C77CFF",
+                                 AGM_4wk_658 = "#F8766D",
+                                 AGM_5wk_555 = "#CD9600",
+                                 AGM_5wk_575 = "#7CAE00",
+                                 AGM_6wk_563 = "#00BE67"))
+annot <- clusters
+colours <- annot_colors
+colAnn <- HeatmapAnnotation(df = clusters,
+                            which = 'col',
+                            show_annotation_name = T,
+                            col = annot_colors,
+                            annotation_legend_param = list(
+                              CellTypes = list(
+                                title = "Cell Type",
+                                title_gp = gpar(fontsize = 14*96/72,
+                                                fontface = "bold"), 
+                                labels_gp = gpar(fontsize = 14*96/72),
+                                grid_height = unit(0.7, "cm")),
+                              Samples = list(
+                                title = "Samples",
+                                title_gp = gpar(fontsize = 14*96/72,
+                                                fontface = "bold"), 
+                                labels_gp = gpar(fontsize = 14*96/72),
+                                grid_height = unit(0.7, "cm"))),
+                            annotation_width = unit(c(1, 4), 'cm'),
+                            gap = unit(1, 'mm'))
+counts_scaled <- t(scale(t(data_reorder)))
+counts_scaled_ordered <- counts_scaled[, match(rownames(clusters),colnames(counts_scaled))]
+clusters$DF <- paste(clusters$CellTypes, clusters$Samples, sep = "_")
+dd <- cluster_within_group(counts_scaled_ordered, clusters$DF)
+hmap <- Heatmap(counts_scaled_ordered,
+                name = "Z-score",
+                show_column_names = F,
+                show_column_dend = F,
+                show_parent_dend_line = F,
+                show_row_dend = F,
+                cluster_rows = F,
+                cluster_columns = dd,
+                column_gap = unit(c(2,2,5,2,5,2,5,2,
+                                    2,5,5,2,5), "mm"),
+                top_annotation=colAnn,
+                column_split = 14,
+                clustering_method_columns = "ward.D2",
+                heatmap_legend_param = list(
+                  title = "Z-Scores",
+                  title_gp = gpar(fontsize = 14*96/72,
+                                  fontface = "bold"), 
+                  labels_gp = gpar(fontsize = 12*96/72),
+                  legend_height = unit(2, "cm")
+                ),
+                width = unit(15*96/72, "cm"), 
+                height = unit(5*96/72, "cm"),
+                use_raster = TRUE, # for raster quality, needs to be tested
+                raster_quality = 5)
+draw(hmap, heatmap_legend_side="right", annotation_legend_side="right")
+# ED_Fig5F ----------------------------------------------------------------
 ## load scRNAseq data from public dataset GSE162950
 load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata")
 EHT_scorecard <- c("CXCR4","SOX17","GJA5","MECOM","HOXA9","SPN","CD44","ITGA2B","HLF","GFI1","MLLT3","KCNK17","MYB","STAT5A","SMIM24","RAB27B","SPINK2")
@@ -32,110 +341,10 @@ DotPlot(sample_sub,
        dot.scale = 10) +
  coord_flip()
-# ED_Fig5C ------------------------------------------------------------------
-## load Seurat data
-sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
-sample@meta.data[["Cell.Annotations"]] <- recode_factor(sample@meta.data[["RNA_snn_res.0.6"]],
-                                                           "0" = "HECs", 
-                                                           "1" = "HECs", 
-                                                           "2" = "HECs", 
-                                                           "3" = "Venous cells", 
-                                                           "4" = "Venous cells", 
-                                                           "5" = "Arterial ECs", 
-                                                           "6" = "Arterial ECs", 
-                                                           "7" = "Venous cells", 
-                                                           "8" = "ECs (M phase)",
-                                                           "9" = "Venous cells",
-                                                           "10" = "EndoMT cells",
-                                                           "11" = "HECs",
-                                                           "12" = "Arterial ECs",
-                                                           "13" = "Lymphatic ECs",
-                                                           "14" = "Arterial ECs",
-                                                           "15" = "Allantois/Placenta",
-                                                           "16" = "HECs",
-                                                           "17" = "HECs",
-                                                           "18" = "EndoMT cells",
-                                                           "19" = "SM cells",
-                                                           "20" = "EndoMT cells",
-                                                           "21" = "EndoMT cells")
-DimPlot(object = sample,
-        reduction = "umap",
-        group.by = "Cell.Annotations",
-        label = F,
-        repel = T,
-        pt.size = 0.8,
-        label.size = 6,
-        cols = c('HECs' = "#377EB8", 
-                 'Lymphatic ECs' = "#4DAF4A",
-                 'Arterial ECs' = "#E41A1C",
-                 'Venous cells' = "#FF7F00",
-                 'EndoMT cells' = "#E6AB02",
-                 'Allantois/Placenta' = "#984EA3",
-                 'SM cells' = "#A6761D",
-                 "ECs (M phase)" = "#A50026"))
-# ED_Fig5D - ED_Fig5E ------------------------------------------------------------------
-FeaturePlot(object = sample,
-            features = c("RUNX1", "FCGR2B"),
-            cols = c("grey90","blue"),
-            order = T,
-            pt.size = 0.1)
-# ED_Fig5F ------------------------------------------------------------------
-runx1 <- readRDS("Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds")
-## Monocle3
-Idents(runx1) <- "RNA_snn_res.0.6"
-start = WhichCells(runx1,idents = "0")
-expression_matrix <- runx1@assays[["RNA"]]@counts
-cell_metadata <- runx1@meta.data
-cell_metadata$orig.ident <- factor(cell_metadata$orig.ident, levels = unique(cell_metadata$orig.ident))
-gene_annotation <- data.frame("gene_short_name" = rownames(runx1))
-rownames(gene_annotation) <- gene_annotation$gene_short_name
-cds <- new_cell_data_set(expression_data = expression_matrix, 
-                         cell_metadata = cell_metadata, 
-                         gene_metadata = gene_annotation)
-cds <- preprocess_cds(cds, num_dim = 50)
-cds <- reduce_dimension(cds)
-## Construct and assign the made up partition
-recreate.partition <- c(rep(1, length(cds@colData@rownames)))
-names(recreate.partition) <- cds@colData@rownames
-recreate.partition <- as.factor(recreate.partition)
-cds@clusters@listData[["UMAP"]][["partitions"]] <- recreate.partition
-## Assign the cluster info
-list_cluster <- runx1@meta.data[["RNA_snn_res.0.6"]]
-names(list_cluster) <- runx1@assays[["RNA"]]@data@Dimnames[[2]]
-cds@clusters@listData[["UMAP"]][["clusters"]] <- list_cluster
-cds@clusters@listData[["UMAP"]][["louvain_res"]] <- "NA"
-## Assign UMAP coordinate
-reducedDims(cds)[["UMAP"]] <- runx1@reductions[["umap"]]@cell.embeddings
-### Reset colnames and rownames (consequence of UMAP replacement)
-rownames(cds@principal_graph_aux[['UMAP']]$dp_mst) <- NULL
-## Learn Graph
-cds <- learn_graph(cds = cds,use_partition = T,learn_graph_control=list(ncenter=220,minimal_branch_len=15),verbose = T)
-cds <- order_cells(cds, root_cells = start)
-plot_cells(cds,
-           color_cells_by = "pseudotime",
-           label_groups_by_cluster=FALSE,
-           label_leaves=T,
-           label_branch_points=T,
-           group_label_size = 8,
-           graph_label_size = 3,
-           cell_size = 1,
-           trajectory_graph_segment_size = 1)
-# ED_Fig5G ------------------------------------------------------------------
-cds <- estimate_size_factors(cds)
-gi <- c("H19","KCNK17","RUNX1", "MYB", "SPN")
-cds_gi <- cds[rowData(cds)$gene_short_name %in% gi,]
-plot_genes_in_pseudotime(cds_subset = cds_gi, 
-                         ncol = 3,
-                         color_cells_by = "RNA_snn_res.0.6", 
-                         min_expr = NULL,
-                         cell_size = 1)
--- a/scRNAseq/ED_Figure6_CellOracle.ipynb
+++ b/scRNAseq/ED_Figure6_CellOracle.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import celloracle as co\n",
+    "from celloracle.applications import Pseudotime_calculator\n",
+    "from celloracle.applications import Gradient_calculator\n",
+    "from celloracle.applications import Oracle_development_module\n",
+    "import anndata2ri\n",
+    "import os\n",
+    "import sys\n",
+    "import time\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import scanpy as sc\n",
+    "import seaborn as sns\n",
+    "import copy\n",
+    "import glob\n",
+    "import time\n",
+    "import shutil\n",
+    "from tqdm.auto import tqdm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plotting parameters settings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# visualization settings\n",
+    "%config InlineBackend.figure_format = 'retina'\n",
+    "%matplotlib inline\n",
+    "plt.rcParams['figure.figsize'] = [6, 4.5]\n",
+    "plt.rcParams[\"savefig.dpi\"] = 300"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "save_folder = \"Scarfo_HEC2023/scRNAseq\"\n",
+    "os.makedirs(save_folder, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Seurat object in Python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Activate the anndata2ri conversion between SingleCellExperiment and AnnData\n",
+    "anndata2ri.activate()\n",
+    "#Loading the rpy2 extension\n",
+    "%load_ext rpy2.ipython\n",
+    "sc.settings.verbosity = 3\n",
+    "sc.logging.print_versions()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%R\n",
+    "suppressPackageStartupMessages(library(Seurat))\n",
+    "runx1_obj <- readRDS(\"Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%R -o runx1_obj_sce\n",
+    "#convert the Seurat object to a SingleCellExperiment object\n",
+    "runx1_obj_sce <- as.SingleCellExperiment(runx1_obj)\n",
+    "runx1_obj_sce"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adata = runx1_obj_sce\n",
+    "adata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Cell number is :{adata.shape[0]}\")\n",
+    "print(f\"Gene number is :{adata.shape[1]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sc.pl.umap(adata, color='RNA_snn_new_res.0.6')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constructing GRN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Select top 3000 highly-variable genes\n",
+    "filter_result = sc.pp.filter_genes_dispersion(adata.X,flavor='cell_ranger',n_top_genes=3000,log=False)\n",
+    "# Subset the genes\n",
+    "adata = adata[:, filter_result.gene_subset]\n",
+    "print(f\"Cell number is :{adata.shape[0]}\")\n",
+    "print(f\"Gene number is :{adata.shape[1]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load data from CellTalkDB\n",
+    "ligand_receptor = pd.read_csv(\"Scarfo_HEC2023/scRNAseq/CellTalkDB_modified.txt\", sep='\\t')\n",
+    "ligand_receptor.columns = ['Ligand','Receptor']\n",
+    "ligand_receptor\n",
+    "print('FCGR2B' in ligand_receptor['Ligand'].unique())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make dictionary: dictionary key is Ligand and dictionary value is list of target genes.\n",
+    "LR_to_TG_dictionary = {}\n",
+    "for LR, TGs in zip(ligand_receptor.Ligand, ligand_receptor.Receptor):\n",
+    "    TG_list = TGs.replace(\" \", \"\").split(\",\")\n",
+    "    LR_to_TG_dictionary[LR] = TG_list\n",
+    "TG_to_LR_dictionary = co.utility.inverse_dictionary(LR_to_TG_dictionary)\n",
+    "TG_to_LR_dictionary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate Oracle object\n",
+    "oracle = co.Oracle()\n",
+    "oracle.import_anndata_as_raw_count(adata=adata,cluster_column_name=\"RNA_snn_new_res.0.6\",embedding_name=\"X_umap\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add LR information \n",
+    "oracle.addTFinfo_dictionary(TG_to_LR_dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Perform PCA\n",
+    "oracle.perform_PCA()\n",
+    "# Select important PCs\n",
+    "plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])\n",
+    "n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]\n",
+    "plt.axvline(n_comps, c=\"k\")\n",
+    "plt.show()\n",
+    "print(n_comps)\n",
+    "n_comps = min(n_comps, 50)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Estimate the optimal number of nearest neighbors for KNN imputation\n",
+    "n_cell = oracle.adata.shape[0]\n",
+    "print(f\"cell number is :{n_cell}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k = int(0.025*n_cell)\n",
+    "print(f\"Auto-selected k is :{k}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# perform KNN imputation\n",
+    "oracle.knn_imputation(n_pca_dims=n_comps,k=k, balanced=True,b_sight=k*8,b_maxl=k*4,n_jobs=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# GRN calculation\n",
+    "%time\n",
+    "links = oracle.get_links(cluster_name_for_GRN_unit=\"RNA_snn_new_res.0.6\", alpha=10,verbose_level=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "links.links_dict.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# filter weak edges\n",
+    "links.filter_links(p=0.001, weight=\"coef_abs\", threshold_number=2000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# network degree distribution\n",
+    "links.plot_degree_distributions(plot_model=True,\n",
+    "                                save=f\"{save_folder}/degree_distribution/\",)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate network scores.\n",
+    "links.get_network_score()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Adding pseudotime to oracle object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate pseudotime object using oracle object.\n",
+    "pt = Pseudotime_calculator(oracle_object=oracle)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clusters_in_RUNX1_lineage = ['0','1','2','11','16','17']\n",
+    "# Make a dictionary\n",
+    "lineage_dictionary = {\"Lineage_RUNX1\": clusters_in_RUNX1_lineage}\n",
+    "# Input lineage information into pseudotime object\n",
+    "pt.set_lineage(lineage_dictionary=lineage_dictionary)\n",
+    "# Visualize lineage information\n",
+    "pt.plot_lineages()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add root cell information\n",
+    "# plotly is required\n",
+    "try:\n",
+    "    import plotly.express as px\n",
+    "    def plot(adata, embedding_key, cluster_column_name):\n",
+    "        embedding = adata.obsm[embedding_key]\n",
+    "        df = pd.DataFrame(embedding, columns=[\"x\", \"y\"])\n",
+    "        df[\"cluster\"] = adata.obs[cluster_column_name].values\n",
+    "        df[\"label\"] = adata.obs.index.values\n",
+    "        fig = px.scatter(df, x=\"x\", y=\"y\", hover_name=df[\"label\"], color=\"cluster\")\n",
+    "        fig.show()\n",
+    "    plot(adata=pt.adata,\n",
+    "         embedding_key=pt.obsm_key,\n",
+    "         cluster_column_name=pt.cluster_column_name)\n",
+    "except:\n",
+    "    print(\"Plotly not found in your environment. Did you install plotly?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Estimated root cell name for each lineage\n",
+    "root_cells = {\"Lineage_RUNX1\": \"Sample1_GGAGAACTCGCGGTAC-1\"}\n",
+    "pt.set_root_cells(root_cells=root_cells)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check diffusion map data.\n",
+    "\"X_diffmap\" in pt.adata.obsm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# calculate diffusion map\n",
+    "sc.pp.neighbors(pt.adata, n_neighbors=30)\n",
+    "sc.tl.diffmap(pt.adata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate pseudotime\n",
+    "pt.get_pseudotime_per_each_lineage()\n",
+    "# Check results\n",
+    "pt.plot_pseudotime(cmap=\"rainbow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add calculated pseudotime data to the oracle object\n",
+    "oracle.adata.obs = pt.adata.obs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## In silico perturbation analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make pridictive models for simulation and fit the ridge regression models again\n",
+    "links.filter_links()\n",
+    "oracle.get_cluster_specific_TFdict_from_Links(links_object=links)\n",
+    "oracle.fit_GRN_for_simulation(alpha=10, use_cluster_specific_TFdict=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "goi = \"FCGR2B\"\n",
+    "# Enter perturbation conditions to simulate signal propagation after the perturbation.\n",
+    "oracle.simulate_shift(perturb_condition={goi: 0.0}, n_propagation=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get transition probability\n",
+    "oracle.estimate_transition_prob(n_neighbors=200,knn_random=True,sampled_fraction=1)\n",
+    "# Calculate embedding\n",
+    "oracle.calculate_embedding_shift(sigma_corr=0.05)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# quiver plots\n",
+    "fig, ax = plt.subplots(1, 2,  figsize=[13, 6])\n",
+    "scale = 25\n",
+    "# Show quiver plot\n",
+    "oracle.plot_quiver(scale=scale, ax=ax[0])\n",
+    "ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
+    "# Show quiver plot that was calculated with randomized graph.\n",
+    "oracle.plot_quiver_random(scale=scale, ax=ax[1])\n",
+    "ax[1].set_title(f\"Randomized simulation vector\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# establishing digitalized grids\n",
+    "n_grid = 40\n",
+    "oracle.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Search for best min_mass.\n",
+    "oracle.suggest_mass_thresholds(n_suggestion=12)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "min_mass = 19\n",
+    "oracle.calculate_mass_filter(min_mass=min_mass, plot=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# shift vector plots\n",
+    "fig, ax = plt.subplots(1, 2,  figsize=[13, 5])\n",
+    "scale_simulation = 2\n",
+    "# Show quiver plot\n",
+    "oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax[0])\n",
+    "ax[0].set_title(f\"Simulated cell identity shift vector: {goi} KO\")\n",
+    "# Show quiver plot that was calculated with randomized graph.\n",
+    "oracle.plot_simulation_flow_random_on_grid(scale=scale_simulation, ax=ax[1])\n",
+    "ax[1].set_title(f\"Randomized simulation vector\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot vector field with cell cluster\n",
+    "fig, ax = plt.subplots(figsize=[8, 8])\n",
+    "oracle.plot_cluster_whole(ax=ax, s=10)\n",
+    "oracle.plot_simulation_flow_on_grid(scale=scale_simulation, ax=ax, show_background=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize pseudotime\n",
+    "fig, ax = plt.subplots(figsize=[6,6])\n",
+    "sc.pl.embedding(adata=oracle.adata, basis=oracle.embedding_name, ax=ax, cmap=\"viridis\",\n",
+    "                color=[\"Pseudotime\"], save=\"pseudotime.pdf\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate Gradient calculator object\n",
+    "gradient = Gradient_calculator(oracle_object=oracle, pseudotime_key=\"Pseudotime\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gradient.calculate_p_mass(smooth=0.8, n_grid=n_grid, n_neighbors=200)\n",
+    "gradient.calculate_mass_filter(min_mass=min_mass, plot=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert pseudotime intop grid points\n",
+    "gradient.transfer_data_into_grid(args={\"method\": \"polynomial\", \"n_poly\":5}, plot=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate graddient\n",
+    "gradient.calculate_gradient()\n",
+    "# Show results\n",
+    "scale_dev = 20\n",
+    "gradient.visualize_results(scale=scale_dev, s=5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize results\n",
+    "fig, ax = plt.subplots(figsize=[6, 6])\n",
+    "gradient.plot_dev_flow_on_grid(scale=scale_dev, ax=ax)\n",
+    "ax.set_title(f\"Normal Developmental flow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# quantitatively compare the directionality and size of vectors between perturbation simulation and natural differentiation using inner product\n",
+    "# Make Oracle_development_module to compare two vector field\n",
+    "dev = Oracle_development_module()\n",
+    "# Load development flow\n",
+    "dev.load_differentiation_reference_data(gradient_object=gradient)\n",
+    "# Load simulation result\n",
+    "dev.load_perturb_simulation_data(oracle_object=oracle)\n",
+    "# Calculate inner produc scores\n",
+    "dev.calculate_inner_product()\n",
+    "dev.calculate_digitized_ip(n_bins=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show perturbation scores\n",
+    "vm = 0.2\n",
+    "fig, ax = plt.subplots(1, 2, figsize=[12, 5])\n",
+    "dev.plot_inner_product_on_grid(vm=0.02, s=50, ax=ax[0])\n",
+    "ax[0].set_title(f\"PS calculated with FCGR2B KO\")\n",
+    "dev.plot_inner_product_random_on_grid(vm=vm, s=50, ax=ax[1])\n",
+    "ax[1].set_title(f\"PS calculated with Randomized simulation vector\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show perturbation scores with perturbation simulation vector field\n",
+    "fig, ax = plt.subplots(figsize=[6, 6])\n",
+    "dev.plot_inner_product_on_grid(vm=0.05, s=50, ax=ax)\n",
+    "dev.plot_simulation_flow_on_grid(scale=scale_simulation, show_background=False, ax=ax)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# summary plots\n",
+    "# visualize the results\n",
+    "dev.visualize_development_module_layout_0(s=5, scale_for_simulation=scale_simulation, s_grid=50, scale_for_pseudotime=scale_dev, vm=0.05)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:postsc]",
+   "language": "python",
+   "name": "conda-env-postsc-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/scRNAseq/ED_Figure6_plots.R
+++ b/scRNAseq/ED_Figure6_plots.R
+suppressPackageStartupMessages(library(Seurat))
+suppressPackageStartupMessages(library(monocle3))
+suppressPackageStartupMessages(library(ggplot2))
+suppressPackageStartupMessages(library(dplyr))
+suppressPackageStartupMessages(library(dyno))
+suppressPackageStartupMessages(library(tidyverse))
+suppressPackageStartupMessages(library(magrittr))
+suppressPackageStartupMessages(library(patchwork))
+# ED_Fig6A ------------------------------------------------------------------
+## load Seurat data
+sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
+sample@meta.data[["Cell.Annotations"]] <- recode_factor(sample@meta.data[["RNA_snn_res.0.6"]],
+                                                        "0" = "HECs", 
+                                                        "1" = "HECs", 
+                                                        "2" = "HECs", 
+                                                        "3" = "Venous cells", 
+                                                        "4" = "Venous cells", 
+                                                        "5" = "Arterial ECs", 
+                                                        "6" = "Arterial ECs", 
+                                                        "7" = "Venous cells", 
+                                                        "8" = "ECs (M phase)",
+                                                        "9" = "Venous cells",
+                                                        "10" = "EndoMT cells",
+                                                        "11" = "HECs",
+                                                        "12" = "Arterial ECs",
+                                                        "13" = "Lymphatic ECs",
+                                                        "14" = "Arterial ECs",
+                                                        "15" = "Allantois/Placenta",
+                                                        "16" = "HECs",
+                                                        "17" = "HECs",
+                                                        "18" = "EndoMT cells",
+                                                        "19" = "SM cells",
+                                                        "20" = "EndoMT cells",
+                                                        "21" = "EndoMT cells")
+DimPlot(object = sample,
+        reduction = "umap",
+        group.by = "Cell.Annotations",
+        label = F,
+        repel = T,
+        pt.size = 0.8,
+        label.size = 6,
+        cols = c('HECs' = "#377EB8", 
+                 'Lymphatic ECs' = "#4DAF4A",
+                 'Arterial ECs' = "#E41A1C",
+                 'Venous cells' = "#FF7F00",
+                 'EndoMT cells' = "#E6AB02",
+                 'Allantois/Placenta' = "#984EA3",
+                 'SM cells' = "#A6761D",
+                 "ECs (M phase)" = "#A50026"))
+# ED_Fig6B - ED_Fig6C ------------------------------------------------------------------
+FeaturePlot(object = sample,
+            features = c("RUNX1", "FCGR2B"),
+            cols = c("grey90","blue"),
+            order = T,
+            pt.size = 0.1)
+# ED_Fig6D ------------------------------------------------------------------
+runx1 <- readRDS("Scarfo_HEC2023/scRNAseq/RUNX1_clusters.rds")
+## Monocle3
+Idents(runx1) <- "RNA_snn_res.0.6"
+start = WhichCells(runx1,idents = "0")
+expression_matrix <- runx1@assays[["RNA"]]@counts
+cell_metadata <- runx1@meta.data
+cell_metadata$orig.ident <- factor(cell_metadata$orig.ident, levels = unique(cell_metadata$orig.ident))
+gene_annotation <- data.frame("gene_short_name" = rownames(runx1))
+rownames(gene_annotation) <- gene_annotation$gene_short_name
+cds <- new_cell_data_set(expression_data = expression_matrix, 
+                         cell_metadata = cell_metadata, 
+                         gene_metadata = gene_annotation)
+cds <- preprocess_cds(cds, num_dim = 50)
+cds <- reduce_dimension(cds)
+## Construct and assign the made up partition
+recreate.partition <- c(rep(1, length(cds@colData@rownames)))
+names(recreate.partition) <- cds@colData@rownames
+recreate.partition <- as.factor(recreate.partition)
+cds@clusters@listData[["UMAP"]][["partitions"]] <- recreate.partition
+## Assign the cluster info
+list_cluster <- runx1@meta.data[["RNA_snn_res.0.6"]]
+names(list_cluster) <- runx1@assays[["RNA"]]@data@Dimnames[[2]]
+cds@clusters@listData[["UMAP"]][["clusters"]] <- list_cluster
+cds@clusters@listData[["UMAP"]][["louvain_res"]] <- "NA"
+## Assign UMAP coordinate
+reducedDims(cds)[["UMAP"]] <- runx1@reductions[["umap"]]@cell.embeddings
+### Reset colnames and rownames (consequence of UMAP replacement)
+rownames(cds@principal_graph_aux[['UMAP']]$dp_mst) <- NULL
+## Learn Graph
+cds <- learn_graph(cds = cds,use_partition = T,learn_graph_control=list(ncenter=220,minimal_branch_len=15),verbose = T)
+cds <- order_cells(cds, root_cells = start)
+plot_cells(cds,
+           color_cells_by = "pseudotime",
+           label_groups_by_cluster=FALSE,
+           label_leaves=T,
+           label_branch_points=T,
+           group_label_size = 8,
+           graph_label_size = 3,
+           cell_size = 1,
+           trajectory_graph_segment_size = 1)
+# ED_Fig6E ------------------------------------------------------------------
+cds <- estimate_size_factors(cds)
+gi <- c("H19","KCNK17","RUNX1", "MYB", "SPN")
+cds_gi <- cds[rowData(cds)$gene_short_name %in% gi,]
+plot_genes_in_pseudotime(cds_subset = cds_gi, 
+                         ncol = 3,
+                         color_cells_by = "RNA_snn_res.0.6", 
+                         min_expr = NULL,
+                         cell_size = 1)
+# ED_Fig6F - ED_Fig6G ------------------------------------------------------------------
+## load Seurat data
+sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
+runx1 <- subset(sample, idents = c("0","1","2","11","16","17"))
+Idents(runx1) <- "RNA_snn_new_res.0.6"
+# preparing data for dyno
+df <- as.matrix(runx1[["RNA"]]@data)
+counts <- Matrix::t(as(as.matrix(runx1@assays$RNA@counts), 'sparseMatrix'))
+expression <- Matrix::t(as(as.matrix(runx1@assays$RNA@data), 'sparseMatrix'))
+dataset_runx1 <- wrap_expression(expression = expression,
+                                 counts = counts)
+dataset_runx1 <- dataset_runx1 %>% 
+  add_prior_information(start_id = "Sample1_ATCTCTATCCAATCCC-1", # from cluster 0
+                        start_n = 0) %>%
+  add_grouping(grouping = runx1$RNA_snn_new_res.0.6) %>%
+  add_dimred(Embeddings(runx1, "umap"))
+# selecting method
+guidelines <- guidelines_shiny(dataset_runx1)
+methods_selected <- guidelines$methods_selected
+methods_selected
+# running top model
+model_paga_t <- infer_trajectory(dataset_runx1, 
+                                 method = ti_paga_tree(), 
+                                 verbose = T,
+                                 give_priors = c("start_id","start_n"))
+# plotting trajectory and pseudotime scores
+patchwork::wrap_plots(
+  plot_dimred(model_paga_t, size_cells = 0.5, grouping = dataset_runx1$grouping) + 
+    ggtitle(" RUNX1 Clusters") +
+    scale_color_manual(values= c("#F8766D","#B79F00","#00BFC4","#619CFF",
+                                 "#F564E3","#00BA38")) +
+    labs(color = "Clusters") +
+    guides(color = guide_legend(override.aes = list(size = 2))),
+  plot_dimred(model_paga_t, size_cells = 0.5, "pseudotime", pseudotime = calculate_pseudotime(model_paga_t)) + 
+    ggtitle("Pseudotime")
+)
--- a/scRNAseq/Fig5_plots.R
+++ b/scRNAseq/Fig5_plots.R
@@ -9,7 +9,7 @@ suppressPackageStartupMessages(library(clusterProfiler))
 ## load Seurat data
 sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
-# Fig5A ------------------------------------------------------------------
+# Fig4A ------------------------------------------------------------------
 DimPlot(object = sample,
        reduction = "umap",
@@ -72,7 +72,7 @@ DimPlot(object = runx1,
        label.size = 8)
-# Fig5B ----------------------------------------------------------------
+# Fig4B ----------------------------------------------------------------
 ## Monocle3
 Idents(runx1) <- "RNA_snn_res.0.6"
@@ -114,7 +114,7 @@ plot_cells(cds,
           cell_size = 1)
-# Fig5C -------------------------------------------------------------------
+# Fig4C -------------------------------------------------------------------
 ## differential markers
 degs_clu11_vs_clu0_1_2 <- FindMarkers(object = runx1, ident.1 = 11, ident.2 = c(0,1,2), only.pos = FALSE, min.pct = 0, test.use = "wilcox", logfc.threshold = 0, min.cells.group = 0)
@@ -171,7 +171,7 @@ for (i in comp) {
  assign(paste0("p_",i), p)
 }
-# Fig5D -------------------------------------------------------------------
+# Fig4D -------------------------------------------------------------------
 runx1@meta.data[["Phase"]] <- recode_factor(runx1@meta.data[["Phase"]],
                                            "G1" = "G1", 
@@ -243,7 +243,7 @@ for (i in cc.clusters) {
 }
 p_final <- cc_umap / (`p_clu_0,1,2` | p_clu_11 | `p_clu_16,17`)
-# Fig5E -------------------------------------------------------------------
+# Fig4E -------------------------------------------------------------------
 cds <- order_cells(cds, root_cells = start)
 cds <- estimate_size_factors(cds)