Commit c42e67ff authored by Stefano Beretta's avatar Stefano Beretta
Browse files

Added scripts and data.

parent 3ee73ae5
.DS_Store
results/
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
library(cowplot)
library(OLIN)
########################
### General Settings ###
########################
# Working dir
wdir <- "~/Downloads/TIGET/Squadrito/scSquadrito/squadrito_livertumor2022_spatial"
# Raw data dir (download from GEO)
geo_dir <- "~/Downloads/TIGET/Squadrito/scSquadrito/GEO_data"
# Data dir
data_dir <- paste(wdir, "data", sep = "/")
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)
analysis_params <- list(
min.feature = 500, # min nFeature_RNA
max.pc.mito = 20, # max percent.mt
mito.prefix = "^mt-", # mito prefix
)
# Load Samples
samples <- c("Sample3", "Sample4", "Sample7", "Sample10", "Sample11", "Sample14", "Sample19", "Sample22")
treatmentGroups <- c("Ctrl", "nonRes", "Ctrl", "parRes", "parRes", "nonRes", "Ctrl", "parRes")
reference.list <- c()
for (i in 1:length(samples)) {
sample <- samples[i]
print(sample)
obj_img <- Read10X_Image(image.dir = paste(geo_dir, paste0(sample, "_spatial"), sep = "/"))
obj <- Load10X_Spatial(data.dir = geo_dir, filename = paste0(sample, "_filtered_feature_bc_matrix.h5"), image = obj_img, slice = paste0(sample, "img"))
# Quality check and Filter
obj <- PercentageFeatureSet(obj, analysis_params$mito.prefix, col.name = "percent_mito")
obj <- PercentageFeatureSet(obj, "^Hb.*-", col.name = "percent_hb")
qcheck_vplot <- VlnPlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), pt.size = 0.1, ncol = 4) + NoLegend()
qcheck_splot <- SpatialFeaturePlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), ncol = 4)
png(filename = paste(out_dir, paste0(sample, "_qCheck_prefilter.png"), sep = "/"), width = 1500, height = 1000, res = 100)
print(qcheck_vplot / qcheck_splot)
dev.off()
obj = obj[, obj$nFeature_Spatial > analysis_params$min.feature & obj$percent_mito < analysis_params$max.pc.mito]
qcheck_vplot <- VlnPlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), pt.size = 0.1, ncol = 4) + NoLegend()
qcheck_splot <- SpatialFeaturePlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), ncol = 4)
png(filename = paste(out_dir, paste0(sample, "_qCheck_postfilter.png"), sep = "/"), width = 1500, height = 1000, res = 100)
print(qcheck_vplot / qcheck_splot)
dev.off()
# Most Expressed Genes (to be checked)
C = obj@assays$Spatial@counts
C@x = C@x/rep.int(Matrix::colSums(C), diff(C@p))
most_expressed <- order(Matrix::rowSums(C), decreasing = T)[30:1]
png(filename = paste(out_dir, paste0(sample, "_top_expressed_genes.png"), sep = "/"), width = 1500, height = 1000, res = 100)
par(mar=c(5,8,4,2))
boxplot(as.matrix(Matrix::t(C[most_expressed, ])), cex = 0.1, las = 1, xlab = "% total count per cell",
col = (scales::hue_pal())(20)[20:1], horizontal = TRUE)
dev.off()
# rerun normalization to store sctransform residuals for all genes
obj <- SCTransform(object = obj, assay = "Spatial", return.only.var.genes = FALSE, verbose = FALSE)
# also run standard log normalization for comparison
obj <- NormalizeData(obj, verbose = FALSE, assay = "Spatial")
# Computes the correlation of the log normalized data and sctransform residuals with the number of UMIs
obj <- GroupCorrelation(object = obj, group.assay = "Spatial", assay = "Spatial", slot = "data", do.plot = FALSE)
obj <- GroupCorrelation(object = obj, group.assay = "Spatial", assay = "SCT", slot = "scale.data", do.plot = FALSE)
p1 <- GroupCorrelationPlot(object = obj, assay = "Spatial", cor = "nCount_Spatial_cor") + ggtitle("Log Normalization") +
theme(plot.title = element_text(hjust = 0.5))
p2 <- GroupCorrelationPlot(object = obj, assay = "SCT", cor = "nCount_Spatial_cor") + ggtitle("SCTransform Normalization") +
theme(plot.title = element_text(hjust = 0.5))
png(filename = paste(out_dir, paste0(sample, "_LogNorm_vs_SCTransf.png"), sep = "/"), width = 1200, height = 1000, res = 100)
print(p1 + p2)
dev.off()
# Add info and Prepare for Integration
obj@meta.data$orig.ident <- sample
obj@meta.data$TreatGroups <- treatmentGroups[i]
DefaultAssay(obj) <- "SCT"
obj <- NormalizeData(obj, verbose = FALSE)
obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 500, verbose = FALSE)
reference.list <- c(reference.list, obj)
}
# Find Anchors and Integrate
crc.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
crc.integrated <- IntegrateData(anchorset = crc.anchors, dims = 1:30)
# Analyze Integrated Object
DefaultAssay(crc.integrated) <- "integrated"
plot1 <- FeatureScatter(crc.integrated, feature1 = 'nCount_SCT', feature2 = "nCount_Spatial", group.by = "orig.ident")
plot2 <- FeatureScatter(crc.integrated, feature1 = "nFeature_SCT", feature2 = "nCount_Spatial", group.by = "orig.ident")
plot3 <- FeatureScatter(crc.integrated, feature1 = "percent_mito", feature2 = "nFeature_Spatial", group.by = "orig.ident")
CombinePlots(plots = list(plot1, plot2,plot3))
# Scale data
crc.integrated <- ScaleData(crc.integrated, verbose = FALSE)
crc.integrated <- RunPCA(crc.integrated, npcs = 70, verbose = FALSE)
ElbowPlot(crc.integrated, ndims = 70)
DefaultAssay(crc.integrated) <- "integrated"
# UMAP and Clustering
crc.integrated <- RunUMAP(crc.integrated, dims = 1:25, seed.use = 123)
crc.integrated <- FindNeighbors(crc.integrated, reduction = "umap", dims = 1:2, force.recalc = T)
crc.integrated <- FindClusters(crc.integrated, resolution = 0.1)
Idents(crc.integrated) <- "integrated_snn_res.0.1"
crc.integrated <- SCTransform(crc.integrated, assay = "Spatial", new.assay.name = "SCTintegrated")
# Merge Cluster that are redundant or poorly represented
crc.integrated@meta.data$integrated_FinalClusters <- crc.integrated@meta.data$integrated_snn_res.0.1
crc.integrated@meta.data$integrated_FinalClusters[crc.integrated@meta.data$integrated_FinalClusters == 8] <- 2
# Tmp save
saveRDS(crc.integrated, file = paste(out_dir, "crc.integrated.rds", sep = "/"))
# Loop on Samples
for (sample in samples){
print(sample)
# SUBSET DATA AND IDENTIFY CLUSTER TO USE ("integrated_FinalClusters")
crc.integrated <- SetIdent(object = crc.integrated, value = "orig.ident")
sub1 <- subset(crc.integrated, idents = sample)
sub1 <- SetIdent(object = sub1, value = "integrated_FinalClusters")
# EXTRACT COORDINATES
Coord <- crc.integrated@images[[paste0(sample, "img")]]@coordinates
ObjClusterofInterest <- crc.integrated@meta.data$integrated_FinalClusters[crc.integrated@meta.data$orig.ident == sample]
# TUMOR CLUSTERS
Coord$ift <- (ObjClusterofInterest == 1 | ObjClusterofInterest == 6) + 0
# CREATING MATRIX WITH ALL POSITIONS AND 1 WHEN TUMOR IS THERE
maxrow <- max(Coord$row)
maxcol <- max(Coord$col)
msub1 <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
msub1 <- as.data.frame(msub1)
for (i in c(1:length(Coord$ift))) {
msub1[Coord$row[i], Coord$col[i]] <- Coord$ift[i]
}
# CREATING MATRIX WITH ALL POSITIONS AND 1 WHEN LIVER IS THERE
msub1liv <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
msub1liv <- as.data.frame(msub1liv)
for (i in c(1:length(Coord$ift))){
msub1liv[Coord$row[i], Coord$col[i]] <- (-Coord$ift[i]+1)
}
# CREATING MATRIX WITH NAMES TO LATER FIT THE SEURAT OBJ
namesTokeep <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
namesTokeep <- as.data.frame(namesTokeep)
for (i in c(1:length(Coord$ift))){
namesTokeep[Coord$row[i], Coord$col[i]] <- rownames(Coord)[i]
}
# COMPUTING moving average on tumors
masub1 <- ma.matrix(as.matrix(msub1), av = "mean", delta = 2, edgeNA = FALSE )
masub1 <- ma.matrix(as.matrix(msub1), av = "mean", delta = 3, edgeNA = FALSE ) + (masub1*2)
indexes1 <- msub1 # 1 is tumor 0 is liver na is empty
masub1 <- masub1 * indexes1
# COMPUTING moving average on liver
masub1liv <- ma.matrix(as.matrix(msub1liv), av = "mean", delta = 2, edgeNA = FALSE)
masub1liv <- ma.matrix(as.matrix(msub1liv), av = "mean", delta = 3, edgeNA = FALSE) * 0.1 + (masub1liv * 2)
indexes1 <- msub1
masub1liv <- masub1liv * (-1*(indexes1-1))
# Make final data.frame with scores, and cell IDS
# Scores of tumor
A1 = 2.96
A2 = 2.7
A3 = 2.3
masub1[masub1 > A1] <- (-4)
masub1[masub1 <= A1 & masub1 > A2] <- (-3)
masub1[masub1 <= A2 & masub1 > A3] <- (-2)
masub1[masub1 <= A3 & masub1 > 0] <- (-1)
# Score of liver
A1 = 2.095
A2 = 1.91
A3 = 1.7
masub1liv[masub1liv <= A3 & masub1liv > 0] <- 0
masub1liv[masub1liv <= A2 & masub1liv > A3] <- 1
masub1liv[masub1liv <= A1 & masub1liv > A2] <- 2
masub1liv[masub1liv > A1] <- 3
# Merging df finalScores
finalScores <- masub1 + masub1liv
finalScores[finalScores == (-4)] <- "Zone_A"
finalScores[finalScores == (-3)] <- "Zone_B"
finalScores[finalScores == (-2)] <- "Zone_C"
finalScores[finalScores == (-1)] <- "Zone_D"
finalScores[finalScores == 0] <- "Zone_E"
finalScores[finalScores == 1] <- "Zone_F"
finalScores[finalScores == 2] <- "Zone_G"
finalScores[finalScores == 3] <- "Zone_H"
finalScores[finalScores == "NaN"] <- NA
# Making the table with SEURAT iDS
FinalTable <- data.frame(SeuratID = as.vector(as.matrix(namesTokeep)),
datasub1 = as.vector(as.matrix(finalScores)),
drow = rep(1:nrow(masub1), ncol(masub1)),
dcol = rep(1:ncol(masub1), nrow(masub1))[order(rep(1:ncol(masub1), nrow(masub1)))]
)
FinalTable <- na.omit(FinalTable)
# Merging in our seurat obj WARNING ix = "obj3"
SeuratPos <- names(crc.integrated@active.ident)
FinalTable <- FinalTable[order(FinalTable$SeuratID),]
if(!"Zones" %in% colnames(crc.integrated@meta.data)){
crc.integrated@meta.data$Zones <- rep("Zone_A", length(SeuratPos))
}
crc.integrated@meta.data$Zones[crc.integrated@meta.data$orig.ident == sample] <- FinalTable$datasub1
objs <- SplitObject(crc.integrated, split.by = "orig.ident")
pdf(paste(out_dir, paste0(sample, ".zones.pdf"), sep = "/"), heigh = 8, width = 8)
print(SpatialDimPlot(objs[[sample]], group.by = "Zones", images = paste0(sample, "img"), crop = T))
dev.off()
}
# Export Meta Data
full_md <- crc.integrated@meta.data
gz_out_md <- gzfile(paste(data_dir, "crc.integrated_metadata.csv.gz", sep = "/"), "w")
write.csv(x = full_md, gz_out_md)
close(gz_out_md)
# Save final object
saveRDS(crc.integrated, file = paste(out_dir, "crc.integrated.rds", sep = "/"))
library(Seurat)
library(fgsea)
library(gplots)
########################
### General Settings ###
########################
# Working dir
wdir <- "~/Downloads/TIGET/Squadrito/scSquadrito/squadrito_livertumor2022_spatial"
# Data dir
data_dir <- paste(wdir, "data", sep = "/")
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)
# Read Obj
crc.integrated <- readRDS(file = paste(out_dir, "crc.integrated.rds", sep = "/"))
## Create Table Visium_DIFGenes_Groupe & SpatialCompartments
## All groups
crc.integrated@meta.data$Zones_unique <- paste(crc.integrated@meta.data$TreatGroups, crc.integrated@meta.data$Zones, sep = "_")
nClus <- unique(crc.integrated@meta.data$Zones_unique)
DefaultAssay(crc.integrated) <- "SCTintegrated"
crc.integrated <- SetIdent(object = crc.integrated, value = "Zones_unique")
cluster0degAll <- NULL
for (i in nClus) {
print(i)
cluster0genes <- FindMarkers(crc.integrated, ident.1 = i, min.pct = 0, only.pos = FALSE, min.cells.group = 1,
test.use = "wilcox", return.thresh = 1, logfc.threshold = 1e-11)
cluster0genes$GeneID <- rownames(cluster0genes)
cluster0deg <- cluster0genes
cluster0deg$Cluster <- i
cluster0degAll <- rbind(cluster0degAll, cluster0deg)
}
write.table(x = cluster0degAll, file = paste(data_dir, "Zones_unique.integrated.txt", sep = "/"),
quote = FALSE, row.names = TRUE, sep = "\t")
## Figure 4a
## Load gsea signature file
miDB_sig2 <- readRDS(paste(data_dir, "miDB_sig3.rds", sep = "/")) #Ctrl
# Loop on Zones unique
Clusters <- unique(cluster0degAll[,7])
TopResultAllCluster <- NULL
for (i in Clusters) {
cat(paste("Calculating ", i, "...", sep = ""))
CLusterTD <- i
df2 <- cluster0degAll[cluster0degAll[,7] == CLusterTD, 2]
names(df2) <- cluster0degAll[cluster0degAll[,7] == CLusterTD, 6]
df2 <- df2[order(df2)]
test1 <- fgsea(pathways = miDB_sig2, stats = df2, minSize = 7, maxSize = 500)
test1 <- test1[rev(order(NES))]
TopResult <- as.data.frame(test1[,])[, c(1,2,3,4,5,6,7)]
TopResult$Cluster <- i
TopResultAllCluster <- rbind(TopResultAllCluster, TopResult)
cat(paste("done!", "\n", sep = ""))
}
write.table(x = TopResultAllCluster, file = paste(data_dir, "GSEA_Zones_unique_integrated.res01.txt", sep = "/"),
quote = FALSE, row.names = TRUE, sep = "\t")
## Heatmaps ##
## Order NES values in a unique table having colum as clusters
## and rows as pathways, if adj.pval > 0.05 NA is introduced
# Create colum with groups
TopResultAllCluster$Group <- stringr::str_split_fixed(TopResultAllCluster$Cluster, "_", 2)[,1]
# Create colum with Zones
TopResultAllCluster$Zones <- stringr::str_split_fixed(TopResultAllCluster$Cluster, "_", 2)[,2]
#Preparing Heatmap parameters
orderC <- unique(TopResultAllCluster$Zones)[order(unique(TopResultAllCluster$Zones))]
SideColors <- rep(c(rep("darkred",3), rep("red",1), rep("darkgreen",3), rep("green",1)), 3)
MLScol <- colorRampPalette(c("darkblue","blue", "grey", "orange","red"), space = "rgb")
# Funtion - assigns the first colum as name
newName<-function(dfCNES,i) {
rownames(dfCNES) <- dfCNES[, i]
dfCNES <- dfCNES[, -(i)]
return(dfCNES)
}
#create tables with enriched terms
for (ix in unique(TopResultAllCluster$Group)){
test <- TopResultAllCluster[TopResultAllCluster$Group == ix,]
test <- test[!is.na(test$padj),]
test <- test[test$padj < 0.05,]
AllCluster <- unique(test$Zones)
orgerGO <- unique(test$pathway)
dfCNES <- data.frame(pathway = orgerGO[order(orgerGO)])
for (i in 1:length(AllCluster)){
dfC1 <- test[test$Zones == AllCluster[i],]
dfC1 <- data.frame(dfC1[order(dfC1$pathway), c(1, 6)])
colnames(dfC1) <- c("pathway", AllCluster[i])
dfCNES <- merge(dfCNES, dfC1, by = 1, all = T)
}
dfCNES <- newName(dfCNES, 1)
dfCNES <- dfCNES[rowSums(is.na(dfCNES)+0) < 8,]
dfCNES[is.na(dfCNES)] <- 0
finaldf2 <- as.matrix(dfCNES)
finaldf2 <- finaldf2[, orderC]
nameF <- paste(ix, "uniquezones.txt", sep = ".")
assign(nameF, finaldf2)
write.table(x = finaldf2, file = paste(out_dir, nameF, sep = "/"), quote = FALSE, row.names = T, sep = "\t")
}
# Select to plot manually curated
Selected.Terms <- c("GOBP_RESPONSE_TO_TYPE_I_INTERFERON",
"GOBP_RESPONSE_TO_INTERFERON_GAMMA",
"GOBP_RESPONSE_TO_VIRUS",
"GOBP_POSITIVE_REGULATION_OF_CYTOKINE_PRODUCTION",
"GOBP_T_CELL_ACTIVATION",
"GOBP_ADAPTIVE_IMMUNE_RESPONSE",
"GOBP_REGULATION_OF_IMMUNE_EFFECTOR_PROCESS",
"IL10_RO",
"HALLMARK_ANGIOGENESIS",
"HALLMARK_P53_PATHWAY",
"HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION",
"HALLMARK_ADIPOGENESIS",
"HALLMARK_PEROXISOME",
"GOBP_SHORT_CHAIN_FATTY_ACID_METABOLIC_PROCESS",
"GOBP_LIPID_OXIDATION",
"HALLMARK_BILE_ACID_METABOLISM")
# Loop to create all Heatmaps
dfNames <- paste0(unique(TopResultAllCluster$Group),".uniquezones.txt")
ix3 <- matrix(nrow = length(Selected.Terms), ncol = 0)
row.names(ix3) <- Selected.Terms
for (i in dfNames){
ix <- read.table(paste(out_dir, i, sep = "/"), header = T, sep = "\t")
ix <- ix[rownames(ix) %in% Selected.Terms,]
diffG <- setdiff(Selected.Terms,rownames(ix))
diffM <- matrix(rep(0, 8*length(diffG)), ncol = 8)
rownames(diffM) <- diffG
colnames(diffM) <- colnames(ix)
ix2 <- rbind(ix, diffM)
ix2 <- ix2[Selected.Terms,]
colnames(ix2) <- paste(sub(".uniquezones.txt", "", i), colnames(ix2), sep = ".")
ix3 <- cbind(ix3, ix2)
}
ix3 <- as.matrix(ix3)
pdf(paste(out_dir, "Heatmap.uniquezones.pdf", sep = "/"), heigh = 6, width = 8)
heatmap.2(ix3, margins = c(5, 12), col = MLScol, cexCol = 0.7, cexRow = 0.5, main = i, scale = "none", Rowv = F, Colv = F, ColSideColors = SideColors)
legend("topright", legend = c("Inner tumor", "Border tumor", "Peritumor", "Liver"), col = unique(SideColors), lty = 1, lwd = 5, cex = .7)
dev.off()
## Extended Figure 7c left
DimPlot(crc.integrated, reduction = "umap", group.by = "integrated_FinalClusters", label = T, pt.size = 2)
## Extended Figure 7c right
SpatialDimPlot(crc.integrated, images = "Sample4img", group.by = "integrated_FinalClusters", pt.size.factor = 2.1)
## Extended Figure 7d
DotPlot(crc.integrated, features = c("Epcam", "Cdh1", "Cldn7", "Actb", "S100a6", "Tmsb10", "Timp1",
"Bgn", "Col3a1", "Spp1", "Alb", "Fabp1", "Apob", "Car3"),
group.by = "integrated_FinalClusters", dot.scale = 15)
## Extended Figure 7e
SpatialDimPlot(crc.integrated, images = "Sample3img", group.by = "Zones", pt.size.factor = 3.5)
SpatialDimPlot(crc.integrated, images = "Sample4img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample7img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample10img", group.by = "Zones", pt.size.factor = 2.5)
SpatialDimPlot(crc.integrated, images = "Sample11img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample14img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample19img", group.by = "Zones", pt.size.factor = 2.1)
SpatialDimPlot(crc.integrated, images = "Sample22img", group.by = "Zones", pt.size.factor = 2.1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment