Commit c42e67ff authored by Stefano Beretta's avatar Stefano Beretta
Browse files

Added scripts and data.

parent 3ee73ae5
.DS_Store
results/
This diff is collapsed.
This diff is collapsed.
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
library(cowplot)
library(OLIN)
########################
### General Settings ###
########################
# Working dir
wdir <- "~/Downloads/TIGET/Squadrito/scSquadrito/squadrito_livertumor2022_spatial"
# Raw data dir (download from GEO)
geo_dir <- "~/Downloads/TIGET/Squadrito/scSquadrito/GEO_data"
# Data dir
data_dir <- paste(wdir, "data", sep = "/")
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)
analysis_params <- list(
min.feature = 500, # min nFeature_RNA
max.pc.mito = 20, # max percent.mt
mito.prefix = "^mt-", # mito prefix
)
# Load Samples
samples <- c("Sample3", "Sample4", "Sample7", "Sample10", "Sample11", "Sample14", "Sample19", "Sample22")
treatmentGroups <- c("Ctrl", "nonRes", "Ctrl", "parRes", "parRes", "nonRes", "Ctrl", "parRes")
reference.list <- c()
for (i in 1:length(samples)) {
sample <- samples[i]
print(sample)
obj_img <- Read10X_Image(image.dir = paste(geo_dir, paste0(sample, "_spatial"), sep = "/"))
obj <- Load10X_Spatial(data.dir = geo_dir, filename = paste0(sample, "_filtered_feature_bc_matrix.h5"), image = obj_img, slice = paste0(sample, "img"))
# Quality check and Filter
obj <- PercentageFeatureSet(obj, analysis_params$mito.prefix, col.name = "percent_mito")
obj <- PercentageFeatureSet(obj, "^Hb.*-", col.name = "percent_hb")
qcheck_vplot <- VlnPlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), pt.size = 0.1, ncol = 4) + NoLegend()
qcheck_splot <- SpatialFeaturePlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), ncol = 4)
png(filename = paste(out_dir, paste0(sample, "_qCheck_prefilter.png"), sep = "/"), width = 1500, height = 1000, res = 100)
print(qcheck_vplot / qcheck_splot)
dev.off()
obj = obj[, obj$nFeature_Spatial > analysis_params$min.feature & obj$percent_mito < analysis_params$max.pc.mito]
qcheck_vplot <- VlnPlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), pt.size = 0.1, ncol = 4) + NoLegend()
qcheck_splot <- SpatialFeaturePlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), ncol = 4)
png(filename = paste(out_dir, paste0(sample, "_qCheck_postfilter.png"), sep = "/"), width = 1500, height = 1000, res = 100)
print(qcheck_vplot / qcheck_splot)
dev.off()
# Most Expressed Genes (to be checked)
C = obj@assays$Spatial@counts
C@x = C@x/rep.int(Matrix::colSums(C), diff(C@p))
most_expressed <- order(Matrix::rowSums(C), decreasing = T)[30:1]
png(filename = paste(out_dir, paste0(sample, "_top_expressed_genes.png"), sep = "/"), width = 1500, height = 1000, res = 100)
par(mar=c(5,8,4,2))
boxplot(as.matrix(Matrix::t(C[most_expressed, ])), cex = 0.1, las = 1, xlab = "% total count per cell",
col = (scales::hue_pal())(20)[20:1], horizontal = TRUE)
dev.off()
# rerun normalization to store sctransform residuals for all genes
obj <- SCTransform(object = obj, assay = "Spatial", return.only.var.genes = FALSE, verbose = FALSE)
# also run standard log normalization for comparison
obj <- NormalizeData(obj, verbose = FALSE, assay = "Spatial")
# Computes the correlation of the log normalized data and sctransform residuals with the number of UMIs
obj <- GroupCorrelation(object = obj, group.assay = "Spatial", assay = "Spatial", slot = "data", do.plot = FALSE)
obj <- GroupCorrelation(object = obj, group.assay = "Spatial", assay = "SCT", slot = "scale.data", do.plot = FALSE)
p1 <- GroupCorrelationPlot(object = obj, assay = "Spatial", cor = "nCount_Spatial_cor") + ggtitle("Log Normalization") +
theme(plot.title = element_text(hjust = 0.5))
p2 <- GroupCorrelationPlot(object = obj, assay = "SCT", cor = "nCount_Spatial_cor") + ggtitle("SCTransform Normalization") +
theme(plot.title = element_text(hjust = 0.5))
png(filename = paste(out_dir, paste0(sample, "_LogNorm_vs_SCTransf.png"), sep = "/"), width = 1200, height = 1000, res = 100)
print(p1 + p2)
dev.off()
# Add info and Prepare for Integration
obj@meta.data$orig.ident <- sample
obj@meta.data$TreatGroups <- treatmentGroups[i]
DefaultAssay(obj) <- "SCT"
obj <- NormalizeData(obj, verbose = FALSE)
obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 500, verbose = FALSE)
reference.list <- c(reference.list, obj)
}
# Find Anchors and Integrate
crc.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
crc.integrated <- IntegrateData(anchorset = crc.anchors, dims = 1:30)
# Analyze Integrated Object
DefaultAssay(crc.integrated) <- "integrated"
plot1 <- FeatureScatter(crc.integrated, feature1 = 'nCount_SCT', feature2 = "nCount_Spatial", group.by = "orig.ident")
plot2 <- FeatureScatter(crc.integrated, feature1 = "nFeature_SCT", feature2 = "nCount_Spatial", group.by = "orig.ident")
plot3 <- FeatureScatter(crc.integrated, feature1 = "percent_mito", feature2 = "nFeature_Spatial", group.by = "orig.ident")
CombinePlots(plots = list(plot1, plot2,plot3))
# Scale data
crc.integrated <- ScaleData(crc.integrated, verbose = FALSE)
crc.integrated <- RunPCA(crc.integrated, npcs = 70, verbose = FALSE)
ElbowPlot(crc.integrated, ndims = 70)
DefaultAssay(crc.integrated) <- "integrated"
# UMAP and Clustering
crc.integrated <- RunUMAP(crc.integrated, dims = 1:25, seed.use = 123)
crc.integrated <- FindNeighbors(crc.integrated, reduction = "umap", dims = 1:2, force.recalc = T)
crc.integrated <- FindClusters(crc.integrated, resolution = 0.1)
Idents(crc.integrated) <- "integrated_snn_res.0.1"
crc.integrated <- SCTransform(crc.integrated, assay = "Spatial", new.assay.name = "SCTintegrated")
# Merge Cluster that are redundant or poorly represented
crc.integrated@meta.data$integrated_FinalClusters <- crc.integrated@meta.data$integrated_snn_res.0.1
crc.integrated@meta.data$integrated_FinalClusters[crc.integrated@meta.data$integrated_FinalClusters == 8] <- 2
# Tmp save
saveRDS(crc.integrated, file = paste(out_dir, "crc.integrated.rds", sep = "/"))
# Loop on Samples
for (sample in samples){
print(sample)
# SUBSET DATA AND IDENTIFY CLUSTER TO USE ("integrated_FinalClusters")
crc.integrated <- SetIdent(object = crc.integrated, value = "orig.ident")
sub1 <- subset(crc.integrated, idents = sample)
sub1 <- SetIdent(object = sub1, value = "integrated_FinalClusters")
# EXTRACT COORDINATES
Coord <- crc.integrated@images[[paste0(sample, "img")]]@coordinates
ObjClusterofInterest <- crc.integrated@meta.data$integrated_FinalClusters[crc.integrated@meta.data$orig.ident == sample]
# TUMOR CLUSTERS
Coord$ift <- (ObjClusterofInterest == 1 | ObjClusterofInterest == 6) + 0
# CREATING MATRIX WITH ALL POSITIONS AND 1 WHEN TUMOR IS THERE
maxrow <- max(Coord$row)
maxcol <- max(Coord$col)
msub1 <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
msub1 <- as.data.frame(msub1)
for (i in c(1:length(Coord$ift))) {
msub1[Coord$row[i], Coord$col[i]] <- Coord$ift[i]
}
# CREATING MATRIX WITH ALL POSITIONS AND 1 WHEN LIVER IS THERE
msub1liv <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
msub1liv <- as.data.frame(msub1liv)
for (i in c(1:length(Coord$ift))){
msub1liv[Coord$row[i], Coord$col[i]] <- (-Coord$ift[i]+1)
}
# CREATING MATRIX WITH NAMES TO LATER FIT THE SEURAT OBJ
namesTokeep <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
namesTokeep <- as.data.frame(namesTokeep)
for (i in c(1:length(Coord$ift))){
namesTokeep[Coord$row[i], Coord$col[i]] <- rownames(Coord)[i]
}
# COMPUTING moving average on tumors
masub1 <- ma.matrix(as.matrix(msub1), av = "mean", delta = 2, edgeNA = FALSE )
masub1 <- ma.matrix(as.matrix(msub1), av = "mean", delta = 3, edgeNA = FALSE ) + (masub1*2)
indexes1 <- msub1 # 1 is tumor 0 is liver na is empty
masub1 <- masub1 * indexes1
# COMPUTING moving average on liver
masub1liv <- ma.matrix(as.matrix(msub1liv), av = "mean", delta = 2, edgeNA = FALSE)
masub1liv <- ma.matrix(as.matrix(msub1liv), av = "mean", delta = 3, edgeNA = FALSE) * 0.1 + (masub1liv * 2)
indexes1 <- msub1
masub1liv <- masub1liv * (-1*(indexes1-1))
# Make final data.frame with scores, and cell IDS
# Scores of tumor
A1 = 2.96
A2 = 2.7
A3 = 2.3
masub1[masub1 > A1] <- (-4)
masub1[masub1 <= A1 & masub1 > A2] <- (-3)
masub1[masub1 <= A2 & masub1 > A3] <- (-2)
masub1[masub1 <= A3 & masub1 > 0] <- (-1)
# Score of liver
A1 = 2.095
A2 = 1.91
A3 = 1.7
masub1liv[masub1liv <= A3 & masub1liv > 0] <- 0
masub1liv[masub1liv <= A2 & masub1liv > A3] <- 1
masub1liv[masub1liv <= A1 & masub1liv > A2] <- 2
masub1liv[masub1liv > A1] <- 3
# Merging df finalScores
finalScores <- masub1 + masub1liv
finalScores[finalScores == (-4)] <- "Zone_A"
finalScores[finalScores == (-3)] <- "Zone_B"
finalScores[finalScores == (-2)] <- "Zone_C"
finalScores[finalScores == (-1)] <- "Zone_D"
finalScores[finalScores == 0] <- "Zone_E"
finalScores[finalScores == 1] <- "Zone_F"
finalScores[finalScores == 2] <- "Zone_G"
finalScores[finalScores == 3] <- "Zone_H"
finalScores[finalScores == "NaN"] <- NA
# Making the table with SEURAT iDS
FinalTable <- data.frame(SeuratID = as.vector(as.matrix(namesTokeep)),
datasub1 = as.vector(as.matrix(finalScores)),
drow = rep(1:nrow(masub1), ncol(masub1)),
dcol = rep(1:ncol(masub1), nrow(masub1))[order(rep(1:ncol(masub1), nrow(masub1)))]
)
FinalTable <- na.omit(FinalTable)
# Merging in our seurat obj WARNING ix = "obj3"
SeuratPos <- names(crc.integrated@active.ident)
FinalTable <- FinalTable[order(FinalTable$SeuratID),]
if(!"Zones" %in% colnames(crc.integrated@meta.data)){
crc.integrated@meta.data$Zones <- rep("Zone_A", length(SeuratPos))
}
crc.integrated@meta.data$Zones[crc.integrated@meta.data$orig.ident == sample] <- FinalTable$datasub1
objs <- SplitObject(crc.integrated, split.by = "orig.ident")
pdf(paste(out_dir, paste0(sample, ".zones.pdf"), sep = "/"), heigh = 8, width = 8)
print(SpatialDimPlot(objs[[sample]], group.by = "Zones", images = paste0(sample, "img"), crop = T))
dev.off()
}
# Export Meta Data
full_md <- crc.integrated@meta.data
gz_out_md <- gzfile(paste(data_dir, "crc.integrated_metadata.csv.gz", sep = "/"), "w")
write.csv(x = full_md, gz_out_md)
close(gz_out_md)
# Save final object
saveRDS(crc.integrated, file = paste(out_dir, "crc.integrated.rds", sep = "/"))
library(Seurat)
library(fgsea)
library(gplots)
########################
### General Settings ###
########################
# Working dir
wdir <- "~/Downloads/TIGET/Squadrito/scSquadrito/squadrito_livertumor2022_spatial"
# Data dir
data_dir <- paste(wdir, "data", sep = "/")
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)
# Read Obj
crc.integrated <- readRDS(file = paste(out_dir, "crc.integrated.rds", sep = "/"))
## Create Table Visium_DIFGenes_Groupe & SpatialCompartments
## All groups
crc.integrated@meta.data$Zones_unique <- paste(crc.integrated@meta.data$TreatGroups, crc.integrated@meta.data$Zones, sep = "_")
nClus <- unique(crc.integrated@meta.data$Zones_unique)
DefaultAssay(crc.integrated) <- "SCTintegrated"
crc.integrated <- SetIdent(object = crc.integrated, value = "Zones_unique")
cluster0degAll <- NULL
for (i in nClus) {
print(i)
cluster0genes <- FindMarkers(crc.integrated, ident.1 = i, min.pct = 0, only.pos = FALSE, min.cells.group = 1,
test.use = "wilcox", return.thresh = 1, logfc.threshold = 1e-11)
cluster0genes$GeneID <- rownames(cluster0genes)
cluster0deg <- cluster0genes
cluster0deg$Cluster <- i
cluster0degAll <- rbind(cluster0degAll, cluster0deg)
}
write.table(x = cluster0degAll, file = paste(data_dir, "Zones_unique.integrated.txt", sep = "/"),
quote = FALSE, row.names = TRUE, sep = "\t")
## Figure 4a
## Load gsea signature file
miDB_sig2 <- readRDS(paste(data_dir, "miDB_sig3.rds", sep = "/")) #Ctrl
# Loop on Zones unique
Clusters <- unique(cluster0degAll[,7])
TopResultAllCluster <- NULL
for (i in Clusters) {
cat(paste("Calculating ", i, "...", sep = ""))
CLusterTD <- i
df2 <- cluster0degAll[cluster0degAll[,7] == CLusterTD, 2]
names(df2) <- cluster0degAll[cluster0degAll[,7] == CLusterTD, 6]
df2 <- df2[order(df2)]
test1 <- fgsea(pathways = miDB_sig2, stats = df2, minSize = 7, maxSize = 500)
test1 <- test1[rev(order(NES))]
TopResult <- as.data.frame(test1[,])[, c(1,2,3,4,5,6,7)]
TopResult$Cluster <- i
TopResultAllCluster <- rbind(TopResultAllCluster, TopResult)
cat(paste("done!", "\n", sep = ""))
}
write.table(x = TopResultAllCluster, file = paste(data_dir, "GSEA_Zones_unique_integrated.res01.txt", sep = "/"),
quote = FALSE, row.names = TRUE, sep = "\t")
## Heatmaps ##
## Order NES values in a unique table having colum as clusters
## and rows as pathways, if adj.pval > 0.05 NA is introduced
# Create colum with groups
TopResultAllCluster$Group <- stringr::str_split_fixed(TopResultAllCluster$Cluster, "_", 2)[,1]
# Create colum with Zones
TopResultAllCluster$Zones <- stringr::str_split_fixed(TopResultAllCluster$Cluster, "_", 2)[,2]
#Preparing Heatmap parameters
orderC <- unique(TopResultAllCluster$Zones)[order(unique(TopResultAllCluster$Zones))]
SideColors <- rep(c(rep("darkred",3), rep("red",1), rep("darkgreen",3), rep("green",1)), 3)
MLScol <- colorRampPalette(c("darkblue","blue", "grey", "orange","red"), space = "rgb")
# Funtion - assigns the first colum as name
newName<-function(dfCNES,i) {
rownames(dfCNES) <- dfCNES[, i]
dfCNES <- dfCNES[, -(i)]
return(dfCNES)
}
#create tables with enriched terms
for (ix in unique(TopResultAllCluster$Group)){
test <- TopResultAllCluster[TopResultAllCluster$Group == ix,]
test <- test[!is.na(test$padj),]
test <- test[test$padj < 0.05,]
AllCluster <- unique(test$Zones)
orgerGO <- unique(test$pathway)
dfCNES <- data.frame(pathway = orgerGO[order(orgerGO)])
for (i in 1:length(AllCluster)){
dfC1 <- test[test$Zones == AllCluster[i],]
dfC1 <- data.frame(dfC1[order(dfC1$pathway), c(1, 6)])
colnames(dfC1) <- c("pathway", AllCluster[i])
dfCNES <- merge(dfCNES, dfC1, by = 1, all = T)
}
dfCNES <- newName(dfCNES, 1)
dfCNES <- dfCNES[rowSums(is.na(dfCNES)+0) < 8,]
dfCNES[is.na(dfCNES)] <- 0
finaldf2 <- as.matrix(dfCNES)
finaldf2 <- finaldf2[, orderC]
nameF <- paste(ix, "uniquezones.txt", sep = ".")
assign(nameF, finaldf2)
write.table(x = finaldf2, file = paste(out_dir, nameF, sep = "/"), quote = FALSE, row.names = T, sep = "\t")
}
# Select to plot manually curated
Selected.Terms <- c("GOBP_RESPONSE_TO_TYPE_I_INTERFERON",
"GOBP_RESPONSE_TO_INTERFERON_GAMMA",
"GOBP_RESPONSE_TO_VIRUS",
"GOBP_POSITIVE_REGULATION_OF_CYTOKINE_PRODUCTION",
"GOBP_T_CELL_ACTIVATION",
"GOBP_ADAPTIVE_IMMUNE_RESPONSE",
"GOBP_REGULATION_OF_IMMUNE_EFFECTOR_PROCESS",
"IL10_RO",
"HALLMARK_ANGIOGENESIS",
"HALLMARK_P53_PATHWAY",
"HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION",
"HALLMARK_ADIPOGENESIS",
"HALLMARK_PEROXISOME",
"GOBP_SHORT_CHAIN_FATTY_ACID_METABOLIC_PROCESS",
"GOBP_LIPID_OXIDATION",
"HALLMARK_BILE_ACID_METABOLISM")
# Loop to create all Heatmaps
dfNames <- paste0(unique(TopResultAllCluster$Group),".uniquezones.txt")
ix3 <- matrix(nrow = length(Selected.Terms), ncol = 0)
row.names(ix3) <- Selected.Terms
for (i in dfNames){
ix <- read.table(paste(out_dir, i, sep = "/"), header = T, sep = "\t")
ix <- ix[rownames(ix) %in% Selected.Terms,]
diffG <- setdiff(Selected.Terms,rownames(ix))
diffM <- matrix(rep(0, 8*length(diffG)), ncol = 8)
rownames(diffM) <- diffG
colnames(diffM) <- colnames(ix)
ix2 <- rbind(ix, diffM)
ix2 <- ix2[Selected.Terms,]
colnames(ix2) <- paste(sub(".uniquezones.txt", "", i), colnames(ix2), sep = ".")
ix3 <- cbind(ix3, ix2)
}
ix3 <- as.matrix(ix3)
pdf(paste(out_dir, "Heatmap.uniquezones.pdf", sep = "/"), heigh = 6, width = 8)
heatmap.2(ix3, margins = c(5, 12), col = MLScol, cexCol = 0.7, cexRow = 0.5, main = i, scale = "none", Rowv = F, Colv = F, ColSideColors = SideColors)
legend("topright", legend = c("Inner tumor", "Border tumor", "Peritumor", "Liver"), col = unique(SideColors), lty = 1, lwd = 5, cex = .7)
dev.off()
## Extended Figure 7c left
DimPlot(crc.integrated, reduction = "umap", group.by = "integrated_FinalClusters", label = T, pt.size = 2)
## Extended Figure 7c right
SpatialDimPlot(crc.integrated, images = "Sample4img", group.by = "integrated_FinalClusters", pt.size.factor = 2.1)
## Extended Figure 7d
DotPlot(crc.integrated, features = c("Epcam", "Cdh1", "Cldn7", "Actb", "S100a6", "Tmsb10", "Timp1",
"Bgn", "Col3a1", "Spp1", "Alb", "Fabp1", "Apob", "Car3"),
group.by = "integrated_FinalClusters", dot.scale = 15)
## Extended Figure 7e
SpatialDimPlot(crc.integrated, images = "Sample3img", group.by = "Zones", pt.size.factor = 3.5)
SpatialDimPlot(crc.integrated, images = "Sample4img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample7img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample10img", group.by = "Zones", pt.size.factor = 2.5)
SpatialDimPlot(crc.integrated, images = "Sample11img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample14img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample19img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample22img", group.by = "Zones", pt.size.factor = 2.1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment