Commit 5657b15d authored by Matteo Barcella's avatar Matteo Barcella
Browse files

scRNAseq code - 1st commit

parent 013b769a
# run each individual library on cellranger
module load cellranger/6.0.1
ref=refdata-cellranger-hg19-3.0.0
cellwrangler () {
echo "Started processing library ${library[$1]} on `date`"
cellranger count --id=${library[$1]} \
--sample=${library[$1]} \
--transcriptome=${ref} \
--fastqs=${fastqdir}
echo "Finishing processing library ${library[$1]} on `date`"
}
# Demux donors in donor-multiplexed samples (libraries: SITTB3, SITTC3, SITTG3, SITTE10, SITTF10)
# adjusts the barcode suffix "-1" to "-1-0" and "-1-1" respectively to match suffixes added by scanpy
adjustbam () {
echo "Starting processing INPUT sample ${libs[$1]} on `date`"
let suffix=$1-1
singularity exec ${tooldir}souporcell.sif samtools view -h ${basepath}${libs[$1]}/outs/possorted_genome_bam.bam \
| sed -r "s/(CB:Z:[ACTG]+\w+-1)/\1-${suffix}/" | \
singularity exec ${tooldir}souporcell.sif \
samtools view -@ ${SLURM_NTASKS} -bSh - > ${basepath}temp/${libs[$1]}_adjusted.bam
echo "Finished processing INPUT sample ${libs[$1]} on `date`"
}
# How many donors/genotypes are present in the sample to demux
ndonors=2
# demultiplex donors using souporcell
demux () {
echo "Started processing library $lib on `date`"
singularity exec ${tooldir}souporcell.sif souporcell_pipeline.py \
-i ${workdir}processed/temp/${libs[$1]}_adjusted.bam \
-b ${workdir}notebooks/output/${libs[$1]}_filtered_barcodes.txt \
-f ${basedir}references/10x/cellranger3x/refdata-cellranger-hg19-3.0.0/fasta/genome.fa \
-o ${workdir}processed/souporcell/${libs[$1]} \
-t ${SLURM_NTASKS} \
-k ${ndonors}
echo "Finished processing library $lib on `date`"
}
prefix <- format(as.Date(Sys.time()), "%Y%m%d")
basename <- '_bthalcombo_v6_preGT_plus_6x_healthy_'
basedir <- '.'
# -----------------------------------------------------------------------------------
library(Seurat)
library(future)
library(ggplot2)
library(cowplot)
suppressMessages(library(dplyr))
plan(strategy = "multicore", workers = 10)
options(future.globals.maxSize = +Inf) # (1024 * 32) * 1024^2 )
# -------------------------------------------------------------------------------------
data <- readRDS( paste0(basedir, '20230704_bthalcombo6_preGT_plus_6x_healthy_controls_counts_Seurat_obj.rds') )
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
Sys.time()
print("Preprocessing")
data.list <- SplitObject(data, split.by = "donor")
data.list <- lapply(X = data.list, FUN = SCTransform, vars.to.regress = c("nCount_RNA", "mitoc_fraction"), vst.flavor = "v2")
data.list <- lapply(X = data.list, FUN = CellCycleScoring, s.features = s.genes,
g2m.features = g2m.genes,
assay = 'SCT' )
data.list <- lapply(X = data.list, FUN = SCTransform,
vars.to.regress = c("nCount_RNA", "mitoc_fraction", 'S.Score', 'G2M.Score'), vst.flavor = "v2")
Sys.time()
print("Selecting features")
features <- SelectIntegrationFeatures(object.list = data.list, nfeatures = 3000)
data.list <- PrepSCTIntegration(object.list = data.list, anchor.features = features)
Sys.time()
print("Selecting anchors")
data.anchors <- FindIntegrationAnchors(object.list = data.list,
normalization.method = "SCT",
anchor.features = features)
saveRDS(file = paste0(basedir, prefix, basename, 'SCT_anchors.rds'), data.anchors )
Sys.time()
print("Integrating datasets...")
data.combined.sct <- IntegrateData(anchorset = data.anchors, normalization.method = "SCT")
Sys.time()
saveRDS(file = paste0(basedir, prefix, basename, 'SCT_integrated.rds'), data.combined.sct )
Sys.time()
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
prefix <- format(as.Date(Sys.time()), "%Y%m%d")
prefix
basedir <- "."
library(tradeSeq)
library(RColorBrewer)
library(SingleCellExperiment)
ann <- anndata::read_h5ad('../h5ad/20231122_bthalcombo_v6c_cr2_no_B_palantir_pseudotime_kernel.h5ad')
eryidx <- read.table(file='../notebooks/output/20231128_bthalcombo_v6c_eryplus_traj_cell_idx.txt', header = FALSE)
eryidx <- as.vector(eryidx$V1)
pseudotime <- ann[eryidx]$obs$palantir_pseudotime
conv <- t( data.frame("bthal005" = "bthal", "bthal006" = "bthal", "healthy_CTRL" = "healthy", "bthal010" = "bthal",
"bthal009" = "bthal", "bthal007" = "bthal", "bthal008" = "bthal", "P185" = "healthy",
"P181" = "healthy", "P257" = "healthy", "paedBM1" = "healthy", "paedBM2" = "healthy") )
condition <- conv[ match(ann[eryidx]$obs$donor, rownames(conv)) ]
sce <- readRDS( paste0(basedir, 'h5ad/', '20230815_bthalcombo_v6b_counts_sf_SCE_obj.rds') )
sce <- sce[,eryidx]
cw <- rep(1,ncol(sce)) # cell weights
rm(ann)
set.seed(42)
Sys.time()
sceGAM <- fitGAM(counts = as.matrix( counts(sce) ),
conditions = factor(condition),
pseudotime=pseudotime,
cellWeights=cw,
nknots=5,
verbose=T
#parallel=TRUE,
#BPPARAM = BPPARAM
)
Sys.time()
saveRDS(file="../output/20231129_sceGAM_erytroid_plus_traj_conditions.rds", sceGAM)
prefix <- format(as.Date(Sys.time()), "%Y%m%d")
prefix
basedir <- '.'
library(tradeSeq)
library(RColorBrewer)
library(SingleCellExperiment)
ann <- anndata::read_h5ad('../h5ad/20231122_bthalcombo_v6c_cr2_no_B_palantir_pseudotime_kernel.h5ad')
monoidx <- read.table(file='../notebooks/output/20231207_bthalcombo_v6c_monoplus_traj_cell_idx_CLEAN.txt', header = FALSE)
monoidx <- as.vector(monoidx$V1)
pseudotime <- ann[monoidx]$obs$palantir_pseudotime
conv <- t( data.frame("bthal005" = "bthal", "bthal006" = "bthal", "healthy_CTRL" = "healthy", "bthal010" = "bthal",
"bthal009" = "bthal", "bthal007" = "bthal", "bthal008" = "bthal", "P185" = "healthy",
"P181" = "healthy", "P257" = "healthy", "paedBM1" = "healthy", "paedBM2" = "healthy") )
condition <- conv[ match(ann[monoidx]$obs$donor, rownames(conv)) ]
sce <- readRDS( paste0(basedir, 'h5ad/', '20230815_bthalcombo_v6b_counts_sf_SCE_obj.rds') )
sce <- sce[,monoidx]
cw <- rep(1,ncol(sce)) # cell weights
rm(ann)
set.seed(42)
Sys.time()
sceGAM <- fitGAM(counts = as.matrix( counts(sce) ),
conditions = factor(condition),
pseudotime=pseudotime,
cellWeights=cw,
nknots=6,
verbose=T
#parallel=TRUE,
#BPPARAM = BPPARAM
)
Sys.time()
saveRDS(file="../output/20231207_sceGAM_mono_plus_traj_conditions.rds", sceGAM)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"cells": [
{
"cell_type": "markdown",
"id": "a223e20f-5403-4aa0-935e-214df220ad93",
"metadata": {},
"source": [
"# bthal dataset v6b - collate and export diff expr results"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "02b5201c-bc31-4361-a57f-f58d95adae62",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ipywidgets: 8.1.2\n",
"matplotlib: 3.7.4\n",
"scanpy : 1.9.8\n",
"seaborn : 0.13.0\n",
"sys : 3.8.10 (default, Nov 22 2023, 10:22:35) \n",
"[GCC 9.4.0]\n",
"pandas : 2.0.3\n",
"numpy : 1.24.4\n",
"json : 2.0.9\n",
"\n"
]
}
],
"source": [
"%matplotlib widget\n",
"%load_ext watermark\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"import os, sys, json, operator, getpass\n",
"from pathlib import Path\n",
"from datetime import datetime\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"import ipywidgets as widgets\n",
"\n",
"\n",
"sc.settings.verbosity = 3 # show some output\n",
"sc.settings.file_format_figs = 'svg' # set this to 'svg' (notebook) or 'pdf' (files) if you want vector graphics\n",
"sc.settings.savefigs = False\n",
"\n",
"# plt.rcParams['font.family'] = 'sans-serif'\n",
"# plt.rcParams['font.sans-serif'] = 'Arial'\n",
"# plt.rc('font', size=14)\n",
"\n",
"home = str(Path.home())\n",
"user = getpass.getuser()\n",
"\n",
"%watermark --iversions"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2389390c-ab9a-44da-84a4-df73cce20e25",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4c834193-f1a4-483c-a1bc-7bdfc92dd0b6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'proteus.45g'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('/.singularity.d/labels.json') as fh:\n",
" singularity = json.load(fh)\n",
" \n",
"singularity['Version']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7696e6b1-47d9-4d33-95c5-f142508734bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20240412\n"
]
}
],
"source": [
"now = datetime.now()\n",
"prefix = now.strftime('%Y%m%d')\n",
"print(prefix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78143475-18b4-41df-a0ed-8c466f8c5573",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "8efd3dd9-2512-447c-aad5-527eec19241a",
"metadata": {},
"source": [
"### collect dex file list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c9a30e75-e0db-4579-84f9-a8fe2fe55f86",
"metadata": {},
"outputs": [],
"source": [
"files = sorted(os.listdir(os.path.join(basedir, 'dex')))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a37bcfa0-cdca-4a2b-8bee-26b7ea98b48a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4ec3ee28-32ce-482f-bd65-e15740470274",
"metadata": {},
"outputs": [],
"source": [
"import re"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a6e7db93-7da6-49b1-a3c5-e4957251572a",
"metadata": {},
"outputs": [],
"source": [
"patt = re.compile('^\\d{8}\\_bthalcombo\\_v6b\\_(.+)\\_bthal\\_vs\\_healthy\\_sig\\_genes\\_DESeq2\\_pb.tsv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fed11c9d-3973-4170-87c1-2450a6b54fc9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 10,
"id": "3a0ba3b0-6e7c-4e7d-a13e-9d6881e82fe7",
"metadata": {},
"outputs": [],
"source": [
"repl = {'C10': 'Mende', 'C45': 'Zeng', 'strict': 'pheno'}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e2b998b-3c42-4643-957e-5eb1c64b0b5c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "d6defa3b-0571-477a-8581-fae088b721ee",
"metadata": {},
"source": [
"### parse dex files"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "88bda9da-e678-467c-89a4-b06a92464f9c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mende_cluster_02-MEP2\n",
"Mende_cluster_04-HSC-MPP1\n",
"Mende_cluster_05-HSC-MPP3\n",
"Mende_cluster_08-EryP\n",
"Mende_cluster_10-MPP-to-MEP\n",
"Mende_cluster_12-MEP1\n",
"Mende_lineage_HSC-MPP\n",
"Mende_lineage_MDP\n",
"Mende_lineage_early-MEMBP\n",
"Mende_lineage_late-MEMBP\n",
"Mende_lineage_late-MyP\n",
"Mende_lineage_primed-MPP\n",
"Zeng_cluster_BFU-E\n",
"Zeng_cluster_CFU-E\n",
"Zeng_cluster_GMP-Mono\n",
"Zeng_lineage_Erytroid\n",
"Zeng_lineage_HSC\n",
"Zeng_lineage_MDP\n",
"Zeng_lineage_MEP\n",
"Zeng_lineage_MPP-MkEry\n",
"Zeng_lineage_MPP-MyLy\n",
"pheno_HSCMPP_population\n",
"22\n",
"22\n"
]
}
],
"source": [
"dex = {}\n",
"counter = 0\n",
"\n",
"for f in files:\n",
" m = patt.search(f)\n",
" if m:\n",
" temp = m.group(1)\n",
" temp = temp.split('_')\n",
" temp = [ repl[temp[0]] ] + temp[1:]\n",
" comp = '_'.join(temp)\n",
" \n",
" dex[comp] = pd.read_csv('../dex/'+f, sep='\\t')\n",
" counter += 1\n",
" print(comp)\n",
" \n",
"print(len(files))\n",
"print(counter)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52bfc29d-b5e6-4304-ab41-bd4344d82339",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "bdf67b7f-f74e-4d13-8ad7-63d482aaa2f7",
"metadata": {},
"source": [
"### make summary"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "a165f92a-7635-4b6d-8fbe-8ead957f8128",
"metadata": {},
"outputs": [],
"source": [
"summary = []"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "58a10438-2e17-47d9-8767-7fdaad012c51",
"metadata": {},
"outputs": [],
"source": [
"for k in dex.keys():\n",
" summary.append([k, dex[k].shape[0], str(sum( dex[k].log2FoldChange > 0 )), str(sum( dex[k].log2FoldChange < 0 ))])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7c8e5aa-6b54-461e-8238-b27ad8eaa4ac",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 27,
"id": "418fca10-d67b-43a2-ade9-d479ed88cc4c",
"metadata": {},
"outputs": [],
"source": [
"summary = pd.DataFrame(summary, columns=['Subset', 'Total', 'Upregulated', 'Downregulated'])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "8bc43c6b-5021-4db7-bf47-aa21c58d4e69",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Subset</th>\n",
" <th>Total</th>\n",
" <th>Upregulated</th>\n",
" <th>Downregulated</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mende_cluster_02-MEP2</td>\n",
" <td>15</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Mende_cluster_04-HSC-MPP1</td>\n",
" <td>103</td>\n",
" <td>25</td>\n",
" <td>78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Mende_cluster_05-HSC-MPP3</td>\n",
" <td>295</td>\n",
" <td>122</td>\n",
" <td>173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Mende_cluster_08-EryP</td>\n",
" <td>14</td>\n",
" <td>3</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Mende_cluster_10-MPP-to-MEP</td>\n",
" <td>52</td>\n",
" <td>7</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Mende_cluster_12-MEP1</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Mende_lineage_HSC-MPP</td>\n",
" <td>356</td>\n",
" <td>153</td>\n",
" <td>203</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Mende_lineage_MDP</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Mende_lineage_early-MEMBP</td>\n",
" <td>17</td>\n",
" <td>2</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Mende_lineage_late-MEMBP</td>\n",
" <td>22</td>\n",
" <td>3</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Mende_lineage_late-MyP</td>\n",
" <td>76</td>\n",
" <td>10</td>\n",
" <td>66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Mende_lineage_primed-MPP</td>\n",
" <td>52</td>\n",
" <td>7</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Zeng_cluster_BFU-E</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Zeng_cluster_CFU-E</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Zeng_cluster_GMP-Mono</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Zeng_lineage_Erytroid</td>\n",
" <td>21</td>\n",
" <td>1</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Zeng_lineage_HSC</td>\n",
" <td>272</td>\n",
" <td>115</td>\n",
" <td>157</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Zeng_lineage_MDP</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Zeng_lineage_MEP</td>\n",
" <td>33</td>\n",
" <td>5</td>\n",
" <td>28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Zeng_lineage_MPP-MkEry</td>\n",
" <td>104</td>\n",
" <td>26</td>\n",
" <td>78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Zeng_lineage_MPP-MyLy</td>\n",
" <td>136</td>\n",
" <td>35</td>\n",
" <td>101</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>pheno_HSCMPP_population</td>\n",
" <td>3037</td>\n",
" <td>1465</td>\n",
" <td>1572</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Subset Total Upregulated Downregulated\n",
"0 Mende_cluster_02-MEP2 15 1 14\n",
"1 Mende_cluster_04-HSC-MPP1 103 25 78\n",
"2 Mende_cluster_05-HSC-MPP3 295 122 173\n",
"3 Mende_cluster_08-EryP 14 3 11\n",
"4 Mende_cluster_10-MPP-to-MEP 52 7 45\n",
"5 Mende_cluster_12-MEP1 5 0 5\n",
"6 Mende_lineage_HSC-MPP 356 153 203\n",
"7 Mende_lineage_MDP 0 0 0\n",
"8 Mende_lineage_early-MEMBP 17 2 15\n",
"9 Mende_lineage_late-MEMBP 22 3 19\n",
"10 Mende_lineage_late-MyP 76 10 66\n",
"11 Mende_lineage_primed-MPP 52 7 45\n",
"12 Zeng_cluster_BFU-E 9 1 8\n",
"13 Zeng_cluster_CFU-E 6 2 4\n",
"14 Zeng_cluster_GMP-Mono 4 0 4\n",
"15 Zeng_lineage_Erytroid 21 1 20\n",
"16 Zeng_lineage_HSC 272 115 157\n",
"17 Zeng_lineage_MDP 6 0 6\n",
"18 Zeng_lineage_MEP 33 5 28\n",
"19 Zeng_lineage_MPP-MkEry 104 26 78\n",
"20 Zeng_lineage_MPP-MyLy 136 35 101\n",
"21 pheno_HSCMPP_population 3037 1465 1572"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"summary"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f964c8e5-1b21-438e-816a-a131b920cf98",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 40,
"id": "3f5b8eb3-1354-415c-9158-39d89da675da",
"metadata": {},
"outputs": [],
"source": [
"# summary.to_excel('../output/20240412_DESEq2_pseudobulk_bthal_vs_healthy_comparisons.xlsx', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c07f0b6-5c92-46dc-8da2-6f912f78ef47",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "99902890-81d5-4539-baeb-1a4729bd9652",
"metadata": {},
"source": [
"### write out dex results (1 subset per tab)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "603f4143-dab0-4eb9-b226-b1ee6abe2925",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['Mende_cluster_02-MEP2', 'Mende_cluster_04-HSC-MPP1', 'Mende_cluster_05-HSC-MPP3', 'Mende_cluster_08-EryP', 'Mende_cluster_10-MPP-to-MEP', 'Mende_cluster_12-MEP1', 'Mende_lineage_HSC-MPP', 'Mende_lineage_MDP', 'Mende_lineage_early-MEMBP', 'Mende_lineage_late-MEMBP', 'Mende_lineage_late-MyP', 'Mende_lineage_primed-MPP', 'Zeng_cluster_BFU-E', 'Zeng_cluster_CFU-E', 'Zeng_cluster_GMP-Mono', 'Zeng_lineage_Erytroid', 'Zeng_lineage_HSC', 'Zeng_lineage_MDP', 'Zeng_lineage_MEP', 'Zeng_lineage_MPP-MkEry', 'Zeng_lineage_MPP-MyLy', 'pheno_HSCMPP_population'])"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dex.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3a53b3d-21e0-4d34-9b8c-1224fad79e4e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e8074ee-679d-447a-bc5b-030a2a43a7da",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 24,
"id": "151f34e9-7fd6-4185-a933-b48db352093b",
"metadata": {},
"outputs": [],
"source": [
"# https://stackoverflow.com/questions/21981820/creating-multiple-excel-worksheets-using-data-from-a-pandas-dataframe"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "fc99c873-f8b1-469d-a712-ed6a168d0bd7",
"metadata": {},
"outputs": [],
"source": [
"with pd.ExcelWriter('../output/20240412_DESEq2_pseudobulk_bthal_vs_healthy_comparisons.xlsx', engine='openpyxl') as writer:\n",
" \n",
" summary.to_excel(writer, sheet_name='Summary', index=False)\n",
" \n",
" for k in dex.keys():\n",
" dex[k].to_excel(writer, sheet_name=k, index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "918ab08c-d25e-4d2d-950a-aa23523200be",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 39,
"id": "c6b16cee-95b9-45e3-bfd6-8c816ac08401",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gene</th>\n",
" <th>baseMean</th>\n",
" <th>log2FoldChange</th>\n",
" <th>lfcSE</th>\n",
" <th>pvalue</th>\n",
" <th>padj</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>IFI6</td>\n",
" <td>2701.297655</td>\n",
" <td>-2.393732</td>\n",
" <td>0.038346</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>GADD45A</td>\n",
" <td>1606.359692</td>\n",
" <td>-2.596506</td>\n",
" <td>0.051663</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>IFI44L</td>\n",
" <td>2107.963029</td>\n",
" <td>-2.474058</td>\n",
" <td>0.043739</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>MCL1</td>\n",
" <td>4403.662831</td>\n",
" <td>-1.206698</td>\n",
" <td>0.030463</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ID2</td>\n",
" <td>2049.391917</td>\n",
" <td>-3.285105</td>\n",
" <td>0.049060</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3032</th>\n",
" <td>AKT3</td>\n",
" <td>235.660229</td>\n",
" <td>-0.228459</td>\n",
" <td>0.117286</td>\n",
" <td>0.009927</td>\n",
" <td>0.049581</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3033</th>\n",
" <td>LGALS3</td>\n",
" <td>11.347210</td>\n",
" <td>0.485461</td>\n",
" <td>0.581906</td>\n",
" <td>0.009935</td>\n",
" <td>0.049607</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3034</th>\n",
" <td>PTGR1</td>\n",
" <td>74.282079</td>\n",
" <td>0.326648</td>\n",
" <td>0.222420</td>\n",
" <td>0.009967</td>\n",
" <td>0.049750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3035</th>\n",
" <td>CENPH</td>\n",
" <td>431.522818</td>\n",
" <td>-0.182787</td>\n",
" <td>0.086405</td>\n",
" <td>0.009979</td>\n",
" <td>0.049791</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3036</th>\n",
" <td>NXPE3</td>\n",
" <td>151.781287</td>\n",
" <td>-0.259021</td>\n",
" <td>0.143365</td>\n",
" <td>0.010015</td>\n",
" <td>0.049958</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3037 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" gene baseMean log2FoldChange lfcSE pvalue padj\n",
"0 IFI6 2701.297655 -2.393732 0.038346 0.000000 0.000000\n",
"1 GADD45A 1606.359692 -2.596506 0.051663 0.000000 0.000000\n",
"2 IFI44L 2107.963029 -2.474058 0.043739 0.000000 0.000000\n",
"3 MCL1 4403.662831 -1.206698 0.030463 0.000000 0.000000\n",
"4 ID2 2049.391917 -3.285105 0.049060 0.000000 0.000000\n",
"... ... ... ... ... ... ...\n",
"3032 AKT3 235.660229 -0.228459 0.117286 0.009927 0.049581\n",
"3033 LGALS3 11.347210 0.485461 0.581906 0.009935 0.049607\n",
"3034 PTGR1 74.282079 0.326648 0.222420 0.009967 0.049750\n",
"3035 CENPH 431.522818 -0.182787 0.086405 0.009979 0.049791\n",
"3036 NXPE3 151.781287 -0.259021 0.143365 0.010015 0.049958\n",
"\n",
"[3037 rows x 6 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dex[k]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15c1507e-70bc-4a8f-8666-2aa08ee1b046",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "420817ea-4e13-42c6-a71b-2357dbe914a8",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "601e1302-91ec-491e-ae58-b5863013de2a",
"metadata": {},
"source": [
"# END"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
The scRNAseq upstream analysis main steps comprised of cellranger count processing and souporcell (for demultiplexing the samples sequenced together).
Standard QC and filtering steps followed, leading to a midstream aligment/integration of all samples using Seurat (v4). For that each library was SCtransformed, with count number, mitochondrial fraction, S score and G2M score regressed before integrating all samples via CCA. Downstream analyses included annotation transfer using both Azimuth and Symphony, and trajectory analysis using Cellrank (v2) and tradeSeq.
- 00_* : helper wrappers for demultiplexing, alignment and quantification of samples
- 06b_* : script to align all samples using Seurat (v4)
- 07[h|i]* : notebooks for annotation of the integrated/aligned dataset
- 07k4* : notebook for pseudobulk differential expression testing with DESeq2
- 07m* : notebooks and scripts for defining and analysing trajectiories using Cellrank (v2) and tradeSeq
- 07n* : notebook defining Subset1-like cells
- 07o* : notebooks to produce specific figures and table for the manuscript
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment