Commit 0c7d82d3 authored by Stefano Beretta's avatar Stefano Beretta
Browse files

Added Paper Result script.

parent 918c0314
.DS_Store
results/ results/
...@@ -6,7 +6,7 @@ library(openxlsx) ...@@ -6,7 +6,7 @@ library(openxlsx)
### General Settings ### ### General Settings ###
######################## ########################
# Working dir # Working dir
wdir <- "~/Downloads/TIGET/Squadrito/scSquadrito/squadrito_livertumor2022_scrnaseq" wdir <- "squadrito_livertumor2022_scrnaseq"
# Data dir # Data dir
data_dir <- paste(wdir, "data", sep = "/") data_dir <- paste(wdir, "data", sep = "/")
# Results dir # Results dir
......
library(Seurat)
library(fgsea)
library(clusterProfiler)
library(dplyr)
### Function Heatmap ###
doSCHeatmapMeanCorrect <- function(obj, clustering, ntop, num_color, high_genes, clu_order = NULL) {
require(Seurat)
require(ggplot2)
require(ComplexHeatmap)
require(dplyr)
require(circlize)
require(RColorBrewer)
require(gridExtra)
require(cowplot)
require(stringr)
require(scales)
col_scales <- list(colorRampPalette(rev(brewer.pal(n = 9, name ="RdBu")))(100),
colorRamp2(c(-4, -3, -2, -1, 0, 1, 2, 3, 4), rev(brewer.pal(n = 9, name ="RdBu"))),
colorRamp2(c(-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2), rev(brewer.pal(n = 9, name ="RdBu"))),
colorRamp2(c(-1.5, -1, -0.5, 0, 0.5, 1, 1.5), rev(brewer.pal(n = 7, name ="RdBu"))),
colorRamp2(c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1), rev(brewer.pal(n = 9, name ="RdBu"))))
if (num_color > length(col_scales)) {
col_sc <- col_scales[[length(col_scales)]]
} else {
col_sc <- col_scales[[num_color]]
}
clu_order_alp <- sort(unique(as.character(obj@meta.data[[clustering]])))
clu_cols_seurat <- hue_pal()(length(clu_order_alp))
names(clu_cols_seurat) <- clu_order_alp
if (!is.null(clu_order)) {
clu_order_alp <- clu_order
}
top_genes <- as.data.frame(obj@misc$markers[[clustering]] %>% group_by(cluster) %>% top_n(ntop, avg_log2FC))
top_genes$cluster <- as.character(top_genes$cluster)
top_genes <- dplyr::left_join(data.frame(cluster = clu_order_alp), top_genes, by = "cluster")
top_genes$cluster <- factor(top_genes$cluster, levels = clu_order_alp)
print(top_genes$gene)
if (nrow(obj@assays$RNA@scale.data) == 0) {
hm_data <- as.matrix(obj@assays$RNA@data[top_genes$gene,])
hm_data <- t(scale(t(hm_data)))
} else {
hm_data <- obj@assays$RNA@scale.data[top_genes$gene,]
}
mm <- merge(obj@meta.data, t(hm_data), by = 0)
rownames(mm) <- mm$Row.names
mm$Row.names <- NULL
mm_filt <- mm[, c(clustering, top_genes$gene)]
mm_matrix <- mm_filt %>% group_by_at(clustering) %>% summarise_all(list(mean)) %>% data.frame()
rownames(mm_matrix) <- mm_matrix[[clustering]]
mm_matrix[[clustering]] <- NULL
hm_matrix <- as.matrix(mm_matrix)
hm_matrix <- hm_matrix[clu_order_alp, ]
for (gg in high_genes) {
if (!gg %in% colnames(hm_matrix)) {
colnames(hm_matrix)[grep(gsub("-", ".", gg), colnames(hm_matrix))] <- gg
}
}
colnames(hm_matrix) <- str_split_fixed(colnames(hm_matrix), "[.]", 2)[,1]
gg <- intersect(colnames(hm_matrix), high_genes)
gg_index <- which(colnames(hm_matrix) %in% gg)
labels <- colnames(hm_matrix)[gg_index]
ha_tre <- columnAnnotation(link = anno_mark(at = gg_index, labels = labels,
labels_rot = 45, padding = 0.75,
labels_gp = gpar(fontsize = 18),
link_width = unit(15, "mm"),
side = "bottom"))
rsplit <- data.frame(Cluster = clu_order_alp)
rownames(rsplit) <- clu_order_alp
rsplit$Cluster <- factor(rsplit$Cluster, levels = clu_order_alp)
ha_col <- rowAnnotation(df = rsplit, show_annotation_name = FALSE,
show_legend = FALSE, col = list(Cluster = clu_cols_seurat),
annotation_legend_param = list(Cluster = list(grid_height = unit(.5, "cm"), grid_width = unit(.3, "cm"),
title_gp = gpar(col = "black", fontsize = 16),
labels_gp = gpar(col = "black", fontsize = 16))))
hm = Heatmap(matrix = hm_matrix,
name = "Expr.",
col = col_sc,
cluster_columns = FALSE,
cluster_rows = FALSE,
rect_gp = gpar(col = "grey", lwd = 0),
border = FALSE,
show_row_names = FALSE,
show_column_names = FALSE,
column_names_side = "top",
column_title_gp = gpar(fontsize = 0),
row_split = rsplit,
column_split = rep(0:(nrow(hm_matrix) - 1), each = ntop),
column_gap = unit(4, "mm"),
row_title_gp = gpar(fontsize = 18),
row_title_rot = 0,
left_annotation = ha_col,
bottom_annotation = ha_tre,
)
return(hm)
}
########################
### General Settings ###
########################
# Working dir
wdir <- "squadrito_livertumor2022_scrnaseq"
# Data dir
data_dir <- paste(wdir, "data", sep = "/")
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)
# Paper dir
paper_dir <- paste(out_dir, "paper_plots", sep = "/")
dir.create(path = paper_dir, showWarnings = F)
# Full Counts
gz_in_counts <- gzfile(paste(data_dir, "Full_final_DBR_labeled_Und_rem_counts.csv.gz", sep = "/"), "rt")
full_counts <- read.csv(gz_in_counts, row.names = 1)
close(gz_in_counts)
# Full Meta Data
gz_in_md <- gzfile(paste(data_dir, "Full_final_DBR_labeled_Und_rem_metadata.csv.gz", sep = "/"), "rt")
full_md <- read.csv(gz_in_md, row.names = 1)
close(gz_in_md)
Full_obj <- CreateSeuratObject(counts = full_counts, meta.data = full_md)
Full_obj <- NormalizeData(object = Full_obj)
###################
### Full Object ###
###################
## Find markers for 4c and 5a
cluster <- unique(Full_obj@meta.data$Newlabels)
for (i in (1:length(cluster))){
population <- cluster[i]
print(population)
Full_obj <- SetIdent(Full_obj, value = "Newlabels")
und_obj <- subset(Full_obj, idents = population)
und_obj <- SetIdent(und_obj, value = "RNA_Group")
DefaultAssay(object = und_obj) <- "RNA"
tableTK <- FindMarkers(und_obj, ident.1 = "IFN_PR", ident.2 = "CTRL", logfc.threshold = 0)
write.table(tableTK, paste(paper_dir, paste(population, "PRvsCTRL.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
tableTK <- FindMarkers(und_obj, ident.1 = "IFN_R", ident.2 = "CTRL", logfc.threshold = 0)
write.table(tableTK, paste(paper_dir, paste(population, "RvsCTRL.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
tableTK <- FindMarkers(und_obj, ident.1 = "IFN_PR", ident.2 = "IFN_R", logfc.threshold = 0)
write.table(tableTK, paste(paper_dir, paste(population, "PRvsR.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
}
## Find marker for Extended figure 10c
cluster <- unique(Full_obj@meta.data$Newlabels_rough)[!unique(Full_obj@meta.data$Newlabels_rough) %in% unique(Full_obj@meta.data$Newlabels)]
for (i in (1:length(cluster))){
population <- cluster[i]
print(population)
Full_obj <- SetIdent(Full_obj, value = "Newlabels_rough")
und_obj <- subset(Full_obj, idents = population)
und_obj <- SetIdent(und_obj, value = "RNA_Group")
DefaultAssay(object = und_obj) <- "RNA"
tableTK <- FindMarkers(und_obj, ident.1 = "IFN_PR", ident.2 = "CTRL", logfc.threshold = 0)
write.table(tableTK,paste(paper_dir, paste(population, "PRvsCTRL.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
tableTK <- FindMarkers(und_obj, ident.1 = "IFN_R", ident.2 = "CTRL", logfc.threshold = 0)
write.table(tableTK, paste(paper_dir, paste(population, "RvsCTRL.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
tableTK <- FindMarkers(und_obj, ident.1 = "IFN_PR", ident.2 = "IFN_R", logfc.threshold = 0)
write.table(tableTK, paste(paper_dir, paste(population, "PRvsR.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
}
## Find marker for Figure 4f
Full_obj <- SetIdent(Full_obj, value = "Newlabels_detailed")
DefaultAssay(object = Full_obj) <- "RNA"
tableTK <- FindMarkers(Full_obj, ident.1 = "IFNa TAMs", ident.2 = "TAMs", logfc.threshold = 0)
write.table(tableTK, paste(paper_dir, "IFNa_TAMs_vs_TAMs.txt", sep = "/"), quote = FALSE, sep = "\t")
## Function used for gsea calculation (Figure 4c and 5a as well as Extended figure 10c)
miDB_sig <- read.gmt("h.all.v7.1.symbols_mouse.gmt")
comparisons <- c("PRvsCTRL.txt", "RvsCTRL.txt", "PRvsR.txt")
for (i in (1:length(cluster))){
population <- cluster[i]
print(population)
for (j in (1:length(comparisons))){
comp <- comparisons[j]
#read indicated comparison
difG <- read.table(paste(paper_dir, paste(population, comp, sep = "_"), sep = "/"), header = T, sep="\t")
difG <- difG[order(difG$avg_log2FC, decreasing = T),]
difG1 <- difG[,2]
names(difG1) <- rownames(difG)
result <- fgsea(pathways = miDB_sig, stats = difG1, minSize = 7, maxSize = 500)
result1 <- as.data.frame(result[,])[, c(1,2,3,4,5,6,7)]
result1 <- result1[order(result1$NES, decreasing = T),]
write.table(result1, paste(paper_dir, paste(population, comp, sep = "_"), sep = "/"), row.names = T, quote = FALSE, sep = "\t")
}
}
## Figure 4b ##
# DimPlot(Full_obj, reduction = "umap", group.by = "Newlabels", label = T, pt.size = 0.8)
# Extended Figure 8a - underlying data
tableTK <- table(data.frame(a = Full_obj@meta.data$orig.ident, b = Full_obj@meta.data$Newlabels))
write.table(tableTK, paste(paper_dir, paste(obj, "NewLabels", "cellcomposition.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
## Extended Figure 8b ##
# FeaturePlot(Full_obj, reduction = "umap",features = c("Cd79a", "Trbc2", "Csf1r", "Cd33", "Tek", "Epcam"), ncol = 3)
## Extended Figure 8c ##
tableFullobj <- read.xlsx(xlsxFile = paste(data_dir, "scRNAseq_DiFgenes_Fullobj_FC0.25.xlsx", sep = "/"))
Full_obj@misc[["markers"]][["Newlabels"]] <- tableFullobj
markers_used <- tableFullobj %>% group_by(cluster) %>% top_n(20, avg_log2FC)
hm <- doSCHeatmapMeanCorrect(obj = Full_obj,
clustering = "Newlabels",
ntop = 20,
num_color = 4,
clu_order = c("LSECs", "Neutrophils", "APCs", "B cells", "T and NK cells", "Erythroblasts", "B1a-like",
"Cancer cells", "Mast cells", "Hepatocytes"),
high_genes = c("Nkg7", "Cd3e", "Cd79a", "Cd79b", "Ebf1","Iglc2", "Lyz2", "Fcer1g", "Csf1r", "S100a8", "Ngp", "Lcn2",
"Ccl3", "Hdc", "Osm", "Hbb-bs", "Hba-a1", "Alas2","Epcam", "Cldn7", "Cldn4", "Fabp1", "Apoc1",
"Alb", "Jchain", "Derl3", "Ighg2b", "Ehd3", "Mrc1"))#, duplicated_labs=T)
pdf(paste(paper_dir, "scRNAseq_Heatmap_Fullobj.pdf", sep = "/"), width = 30, height = 10)
draw(hm)
dev.off()
## Extended Figure 8d ##
# DimPlot(Full_obj, reduction = "umap", group.by = "Newlabels", label = T, split.by = "RNA_Group",pt.size = 0.8)
##################
### APC Object ###
##################
# APC Counts
gz_in_counts <- gzfile(paste(data_dir, "APCs_cells_DBR_labeled_Und_rem_counts.csv.gz", sep = "/"), "rt")
apc_counts <- read.csv(gz_in_counts, row.names = 1)
close(gz_in_counts)
# APC Meta Data
gz_in_md <- gzfile(paste(data_dir, "APCs_cells_DBR_labeled_Und_rem_metadata.csv.gz", sep = "/"), "rt")
apc_md <- read.csv(gz_in_md, row.names = 1)
close(gz_in_md)
APC_obj <- CreateSeuratObject(counts = apc_counts, meta.data = apc_md)
APC_obj <- NormalizeData(object = APC_obj)
## Figure 4d ##
# DimPlot(APC_obj, reduction = "umap", group.by = "Newlabels_detailed", label = T, split.by = "RNA_Group",pt.size = 1.2)
## Figure 4e - underlying data ##
tableTK <- table(data.frame(a = APC_obj@meta.data$orig.ident, b = APC_obj@meta.data$Newlabels_detailed))
write.table(tableTK, paste(paper_dir, paste("APCs", "NewLabels_detailed", "cellcomposition.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
## Extended Figure 9a ##
tableAPCobj <- read.xlsx(xlsxFile = paste(data_dir, "scRNAseq_DiFgenes_APCs_FC0.25.xlsx", sep = "/"))
APC_obj@misc[["markers"]][["Newlabels_detailed"]] <- tableAPCobj
markers_used <- tableAPCobj %>% group_by(cluster) %>% top_n(20, avg_log2FC)
hm <- doSCHeatmapMeanCorrect(obj = APC_obj,
clustering = "Newlabels_detailed",
ntop = 20,
num_color = 4,
clu_order = c("Monocytes", "TAMs", "IFNa TAMs", "KCs", "Mo DCs", "CDPs", "pDCs",
"CD8 cDC1", "Ccr7 DCs", "pre DCs"),
high_genes = c("Chil3", "Ifi204","Ifi27l2a", "Arg1", "Cscl10", "Cd300e", "Eno3", "Adgre4", "Fn1",
"Vcan", "H2-Eb1", "H2-Ab1", "Cd209a", "Ciita", "C1qa", "C1qb",
"Apoe", "Il1b", "Stmn1", "Mki67", "Sox4", "Bcl11a", "Crip1",
"Ly6d", "Siglech", "Ccr9", "Xcr1", "Clec91", "Fscn1", "Ccr7", "Cd63"))#, duplicated_labs=T)
pdf(paste(paper_dir, "scRNAseq_Heatmap_APCs.pdf", sep = "/"), width = 30, height = 10)
draw(hm)
dev.off()
## Extended Figure 9b ##
APC_obj$Newlabels_detailed_order <- rep("Undefined", length(APC_obj@meta.data$orig.ident))
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="KCs"]<-"A KCs"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="IFNa TAMs"]<-"B IFNa TAMs"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="TAMs"]<-"C TAMs"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="Monocytes"]<-"D Monocytes"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="Mo DCs"]<-"E Mo DCs"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="CDPs"]<-"F CDPs"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="Ccr7 DCs"]<-"G Ccr7 DCs"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="pDCs"]<-"H pDCs"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="CD8 cDC1"]<-"I cDC1"
APC_obj$Newlabels_detailed_order[APC_obj$Newlabels_detailed=="pre DCs"]<-"J pre DCs"
APC_obj <- SetIdent(APC_obj, value = "Newlabels_detailed_order")
featuresDC <- c("Mki67", "Xcr1", "Ccr9", "Siglech","Ccr7", "Itgax", "Ciita","Cd274")
featuresISG <- c("Irf7", "Oas1a", "Ifit1", "Ly6c2","Fcgr1","Slfn4")
featuresTAM <- c("Cx3cr1", "Itgam","Lyz2","Cebpb","Mmp8","C3","Chil3","Fn1")
featuresKC <- c("Adgre1","Mrc1","C1qa", "C1qb","Apoe","Cd81")
featuresAPC <- c(featuresDC, featuresTAM, featuresISG, featuresKC)
DotPlot(APC_obj, features = featuresAPC, group.by = "Newlabels_detailed_order", dot.scale = 14)
##################
### TNK Object ###
##################
# TNK Counts
gz_in_counts <- gzfile(paste(data_dir, "T_NK_cells_DBR_labeled_Und_rem_counts.csv.gz", sep = "/"), "rt")
tnk_counts <- read.csv(gz_in_counts, row.names = 1)
close(gz_in_counts)
# TNK Meta Data
gz_in_md <- gzfile(paste(data_dir, "T_NK_cells_DBR_labeled_Und_rem_metadata.csv.gz", sep = "/"), "rt")
tnk_md <- read.csv(gz_in_md, row.names = 1)
close(gz_in_md)
TNK_obj <- CreateSeuratObject(counts = tnk_counts, meta.data = tnk_md)
TNK_obj <- NormalizeData(object = TNK_obj)
## Figure 5b ##
# DimPlot(TNK_obj, reduction = "umap", group.by = "Newlabels_detailed", label = T, split.by = "RNA_Group", pt.size = 0.8)
## Figure 5c - underlying data ##
tableTK <- table(data.frame(a = TNK_obj@meta.data$orig.ident, b = TNK_obj@meta.data$Newlabels_detailed))
write.table(tableTK, paste(paper_dir, paste("TNK", "NewLabels_detailed", "cellcomposition.txt", sep = "_"), sep = "/"), quote = FALSE, sep = "\t")
## Figure 5d ##
TNK_obj <- SetIdent(TNK_obj, value = "Newlabels_rough")
CD8_obj <- subset(TNK_obj, idents = c("CD8 T cells"))
DotPlot(CD8_obj, features = c("Pdcd1", "Lag3", "Havcr2", "Ctla4", "Eomes", "Tox", "Ccl3", "Ccl4", "Casp3",
"Tcf7", "Tbx21", "Cd69", "Itgae", "Itga1", "Cd7", "Il2", "Tnf", "Il12a"),
group.by = "RNA_Group", dot.scale = 15)
## Extended Figure 10a ##
tableTNKobj <- read.xlsx(xlsxFile = paste(data_dir, "scRNAseq_DiFgenes_T&NKcells_FC0.25.xlsx", sep = "/"))
TNK_obj@misc$markers[["Newlabels_detailed"]] <- tableTNKobj
markers_used <- tableTNKobj %>% group_by(cluster) %>% top_n(20, avg_log2FC)
hm <- doSCHeatmapMeanCorrect(obj = TNK_obj,
clustering = "Newlabels_detailed",
ntop = 20,
num_color = 4,
clu_order = c("gd T cells", "T prol cells", "CD8 Tm cells", "CD4 Tr1-like cells", "CD8 Tex cells",
"CD8 Teff1 cells", "CD8 Teff2 cells", "CD8 Tsm-like cells", "CD4 Tsm-like cells",
"CD4 Teff-like cells", "CD4 Tex cells", "CD4 Treg cells", "ILCs", "MAIT", "iNKT",
"gd NKT", "NK", "Gzma NKT"),
high_genes = c("Foxp3", "Ctla4", "Cd8a", "Lag3", "Pdcd1", "Gzmk", "Maf", "Eomes", "Xcl1", "Klrc1",
"Stmn1", "Mki67", "Lef1", "Ccr7", "Tcf7", "Il7r", "Cd40lg", "Itgb1", "Rora", "Trdc",
"Tcrg-C1", "Klrc1", "Xcl1", "Fcer1g", "Trdc", "Gzma", "Ncr1", "Cx3cr1", "Hif1a",
"Sell", "Tnf", "Gzmb"))#, duplicated_labs=T)
pdf(paste(paper_dir, "scRNAseq_Heatmap_TNKcells.pdf", sep = "/"), width = 30, height = 10)
draw(hm)
dev.off()
## Extended Figure 10b ##
TNK_obj <- SetIdent(TNK_obj, value = "Newlabels_detailed")
DotPlot(TNK_obj, features = c("Cd4", "Gzmk", "Lag3", "Pdcd1", "Eomes", "Ahr", "Maf", "Prdm1","Il10ra", "Il10", "Ctla4", "Il7r", "Cd40lg",
"Cd8a", "Cd69","Itgae", "Itga1", "Cd7", "Rora", "Cxcr3", "Tnf", "Il2"),group.by = "Newlabels_detailed",
dot.scale = 15)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment