Commit 5657b15d authored by Matteo Barcella's avatar Matteo Barcella
Browse files

scRNAseq code - 1st commit

parent 013b769a
# run each individual library on cellranger
module load cellranger/6.0.1
ref=refdata-cellranger-hg19-3.0.0
cellwrangler () {
echo "Started processing library ${library[$1]} on `date`"
cellranger count --id=${library[$1]} \
--sample=${library[$1]} \
--transcriptome=${ref} \
--fastqs=${fastqdir}
echo "Finishing processing library ${library[$1]} on `date`"
}
# Demux donors in donor-multiplexed samples (libraries: SITTB3, SITTC3, SITTG3, SITTE10, SITTF10)
# adjusts the barcode suffix "-1" to "-1-0" and "-1-1" respectively to match suffixes added by scanpy
adjustbam () {
echo "Starting processing INPUT sample ${libs[$1]} on `date`"
let suffix=$1-1
singularity exec ${tooldir}souporcell.sif samtools view -h ${basepath}${libs[$1]}/outs/possorted_genome_bam.bam \
| sed -r "s/(CB:Z:[ACTG]+\w+-1)/\1-${suffix}/" | \
singularity exec ${tooldir}souporcell.sif \
samtools view -@ ${SLURM_NTASKS} -bSh - > ${basepath}temp/${libs[$1]}_adjusted.bam
echo "Finished processing INPUT sample ${libs[$1]} on `date`"
}
# How many donors/genotypes are present in the sample to demux
ndonors=2
# demultiplex donors using souporcell
demux () {
echo "Started processing library $lib on `date`"
singularity exec ${tooldir}souporcell.sif souporcell_pipeline.py \
-i ${workdir}processed/temp/${libs[$1]}_adjusted.bam \
-b ${workdir}notebooks/output/${libs[$1]}_filtered_barcodes.txt \
-f ${basedir}references/10x/cellranger3x/refdata-cellranger-hg19-3.0.0/fasta/genome.fa \
-o ${workdir}processed/souporcell/${libs[$1]} \
-t ${SLURM_NTASKS} \
-k ${ndonors}
echo "Finished processing library $lib on `date`"
}
prefix <- format(as.Date(Sys.time()), "%Y%m%d")
basename <- '_bthalcombo_v6_preGT_plus_6x_healthy_'
basedir <- '.'
# -----------------------------------------------------------------------------------
library(Seurat)
library(future)
library(ggplot2)
library(cowplot)
suppressMessages(library(dplyr))
plan(strategy = "multicore", workers = 10)
options(future.globals.maxSize = +Inf) # (1024 * 32) * 1024^2 )
# -------------------------------------------------------------------------------------
data <- readRDS( paste0(basedir, '20230704_bthalcombo6_preGT_plus_6x_healthy_controls_counts_Seurat_obj.rds') )
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
Sys.time()
print("Preprocessing")
data.list <- SplitObject(data, split.by = "donor")
data.list <- lapply(X = data.list, FUN = SCTransform, vars.to.regress = c("nCount_RNA", "mitoc_fraction"), vst.flavor = "v2")
data.list <- lapply(X = data.list, FUN = CellCycleScoring, s.features = s.genes,
g2m.features = g2m.genes,
assay = 'SCT' )
data.list <- lapply(X = data.list, FUN = SCTransform,
vars.to.regress = c("nCount_RNA", "mitoc_fraction", 'S.Score', 'G2M.Score'), vst.flavor = "v2")
Sys.time()
print("Selecting features")
features <- SelectIntegrationFeatures(object.list = data.list, nfeatures = 3000)
data.list <- PrepSCTIntegration(object.list = data.list, anchor.features = features)
Sys.time()
print("Selecting anchors")
data.anchors <- FindIntegrationAnchors(object.list = data.list,
normalization.method = "SCT",
anchor.features = features)
saveRDS(file = paste0(basedir, prefix, basename, 'SCT_anchors.rds'), data.anchors )
Sys.time()
print("Integrating datasets...")
data.combined.sct <- IntegrateData(anchorset = data.anchors, normalization.method = "SCT")
Sys.time()
saveRDS(file = paste0(basedir, prefix, basename, 'SCT_integrated.rds'), data.combined.sct )
Sys.time()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
prefix <- format(as.Date(Sys.time()), "%Y%m%d")
prefix
basedir <- "."
library(tradeSeq)
library(RColorBrewer)
library(SingleCellExperiment)
ann <- anndata::read_h5ad('../h5ad/20231122_bthalcombo_v6c_cr2_no_B_palantir_pseudotime_kernel.h5ad')
eryidx <- read.table(file='../notebooks/output/20231128_bthalcombo_v6c_eryplus_traj_cell_idx.txt', header = FALSE)
eryidx <- as.vector(eryidx$V1)
pseudotime <- ann[eryidx]$obs$palantir_pseudotime
conv <- t( data.frame("bthal005" = "bthal", "bthal006" = "bthal", "healthy_CTRL" = "healthy", "bthal010" = "bthal",
"bthal009" = "bthal", "bthal007" = "bthal", "bthal008" = "bthal", "P185" = "healthy",
"P181" = "healthy", "P257" = "healthy", "paedBM1" = "healthy", "paedBM2" = "healthy") )
condition <- conv[ match(ann[eryidx]$obs$donor, rownames(conv)) ]
sce <- readRDS( paste0(basedir, 'h5ad/', '20230815_bthalcombo_v6b_counts_sf_SCE_obj.rds') )
sce <- sce[,eryidx]
cw <- rep(1,ncol(sce)) # cell weights
rm(ann)
set.seed(42)
Sys.time()
sceGAM <- fitGAM(counts = as.matrix( counts(sce) ),
conditions = factor(condition),
pseudotime=pseudotime,
cellWeights=cw,
nknots=5,
verbose=T
#parallel=TRUE,
#BPPARAM = BPPARAM
)
Sys.time()
saveRDS(file="../output/20231129_sceGAM_erytroid_plus_traj_conditions.rds", sceGAM)
prefix <- format(as.Date(Sys.time()), "%Y%m%d")
prefix
basedir <- '.'
library(tradeSeq)
library(RColorBrewer)
library(SingleCellExperiment)
ann <- anndata::read_h5ad('../h5ad/20231122_bthalcombo_v6c_cr2_no_B_palantir_pseudotime_kernel.h5ad')
monoidx <- read.table(file='../notebooks/output/20231207_bthalcombo_v6c_monoplus_traj_cell_idx_CLEAN.txt', header = FALSE)
monoidx <- as.vector(monoidx$V1)
pseudotime <- ann[monoidx]$obs$palantir_pseudotime
conv <- t( data.frame("bthal005" = "bthal", "bthal006" = "bthal", "healthy_CTRL" = "healthy", "bthal010" = "bthal",
"bthal009" = "bthal", "bthal007" = "bthal", "bthal008" = "bthal", "P185" = "healthy",
"P181" = "healthy", "P257" = "healthy", "paedBM1" = "healthy", "paedBM2" = "healthy") )
condition <- conv[ match(ann[monoidx]$obs$donor, rownames(conv)) ]
sce <- readRDS( paste0(basedir, 'h5ad/', '20230815_bthalcombo_v6b_counts_sf_SCE_obj.rds') )
sce <- sce[,monoidx]
cw <- rep(1,ncol(sce)) # cell weights
rm(ann)
set.seed(42)
Sys.time()
sceGAM <- fitGAM(counts = as.matrix( counts(sce) ),
conditions = factor(condition),
pseudotime=pseudotime,
cellWeights=cw,
nknots=6,
verbose=T
#parallel=TRUE,
#BPPARAM = BPPARAM
)
Sys.time()
saveRDS(file="../output/20231207_sceGAM_mono_plus_traj_conditions.rds", sceGAM)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
The scRNAseq upstream analysis main steps comprised of cellranger count processing and souporcell (for demultiplexing the samples sequenced together).
Standard QC and filtering steps followed, leading to a midstream aligment/integration of all samples using Seurat (v4). For that each library was SCtransformed, with count number, mitochondrial fraction, S score and G2M score regressed before integrating all samples via CCA. Downstream analyses included annotation transfer using both Azimuth and Symphony, and trajectory analysis using Cellrank (v2) and tradeSeq.
- 00_* : helper wrappers for demultiplexing, alignment and quantification of samples
- 06b_* : script to align all samples using Seurat (v4)
- 07[h|i]* : notebooks for annotation of the integrated/aligned dataset
- 07k4* : notebook for pseudobulk differential expression testing with DESeq2
- 07m* : notebooks and scripts for defining and analysing trajectiories using Cellrank (v2) and tradeSeq
- 07n* : notebook defining Subset1-like cells
- 07o* : notebooks to produce specific figures and table for the manuscript
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment