Added scripts and data.

c42e67ff · Stefano Beretta · 3ee73ae5 · c42e67ff · c42e67ff · c42e67ff
Commit c42e67ff authored Dec 22, 2022 by Stefano Beretta
--- a/.gitignore
+++ b/.gitignore
+.DS_Store
+results/
--- a/data/GSEA_Zones_unique_integrated.res01.txt
+++ b/data/GSEA_Zones_unique_integrated.res01.txt
--- a/data/Zones_unique.integrated.txt
+++ b/data/Zones_unique.integrated.txt
--- a/data/crc.integrated_metadata.csv.gz
+++ b/data/crc.integrated_metadata.csv.gz
--- a/data/miDB_sig3.rds
+++ b/data/miDB_sig3.rds
--- a/scripts/0_Compute_Zones.R
+++ b/scripts/0_Compute_Zones.R
+library(Seurat)
+library(ggplot2)
+library(patchwork)
+library(dplyr)
+library(cowplot)
+library(OLIN)
+
+########################
+### General Settings ###
+########################
+# Working dir
+wdir <- "~/Downloads/TIGET/Squadrito/scSquadrito/squadrito_livertumor2022_spatial"
+# Raw data dir (download from GEO)
+geo_dir <- "~/Downloads/TIGET/Squadrito/scSquadrito/GEO_data"
+# Data dir
+data_dir <- paste(wdir, "data", sep = "/")
+# Results dir
+out_dir <- paste(wdir, "results", sep = "/")
+dir.create(path = out_dir, showWarnings = F)
+
+analysis_params <- list(
+    min.feature = 500, # min nFeature_RNA
+    max.pc.mito = 20, # max percent.mt
+    mito.prefix = "^mt-", # mito prefix
+)
+
+# Load Samples
+samples <- c("Sample3", "Sample4", "Sample7", "Sample10", "Sample11", "Sample14", "Sample19", "Sample22")
+treatmentGroups <- c("Ctrl", "nonRes", "Ctrl", "parRes", "parRes", "nonRes", "Ctrl", "parRes")
+reference.list <- c()
+for (i in 1:length(samples)) {
+    sample <- samples[i]
+    print(sample)
+    obj_img <- Read10X_Image(image.dir = paste(geo_dir, paste0(sample, "_spatial"), sep = "/"))
+    obj <- Load10X_Spatial(data.dir = geo_dir, filename = paste0(sample, "_filtered_feature_bc_matrix.h5"), image = obj_img, slice = paste0(sample, "img"))
+    # Quality check and Filter
+    obj <- PercentageFeatureSet(obj, analysis_params$mito.prefix, col.name = "percent_mito")
+    obj <- PercentageFeatureSet(obj, "^Hb.*-", col.name = "percent_hb")
+    qcheck_vplot <- VlnPlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), pt.size = 0.1, ncol = 4) + NoLegend()
+    qcheck_splot <- SpatialFeaturePlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), ncol = 4)
+    png(filename = paste(out_dir, paste0(sample, "_qCheck_prefilter.png"), sep = "/"), width = 1500, height = 1000, res = 100)
+    print(qcheck_vplot / qcheck_splot)
+    dev.off()
+    obj = obj[, obj$nFeature_Spatial > analysis_params$min.feature & obj$percent_mito < analysis_params$max.pc.mito]
+    qcheck_vplot <- VlnPlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), pt.size = 0.1, ncol = 4) + NoLegend()
+    qcheck_splot <- SpatialFeaturePlot(object = obj, features = c("nCount_Spatial", "nFeature_Spatial", "percent_mito", "percent_hb"), ncol = 4)
+    png(filename = paste(out_dir, paste0(sample, "_qCheck_postfilter.png"), sep = "/"), width = 1500, height = 1000, res = 100)
+    print(qcheck_vplot / qcheck_splot)
+    dev.off()
+    # Most Expressed Genes (to be checked)
+    C = obj@assays$Spatial@counts
+    C@x = C@x/rep.int(Matrix::colSums(C), diff(C@p))
+    most_expressed <- order(Matrix::rowSums(C), decreasing = T)[30:1]
+    png(filename = paste(out_dir, paste0(sample, "_top_expressed_genes.png"), sep = "/"), width = 1500, height = 1000, res = 100)
+    par(mar=c(5,8,4,2))
+    boxplot(as.matrix(Matrix::t(C[most_expressed, ])), cex = 0.1, las = 1, xlab = "% total count per cell",
+            col = (scales::hue_pal())(20)[20:1], horizontal = TRUE)
+    dev.off()
+    # rerun normalization to store sctransform residuals for all genes
+    obj <- SCTransform(object = obj, assay = "Spatial", return.only.var.genes = FALSE, verbose = FALSE)
+    # also run standard log normalization for comparison
+    obj <- NormalizeData(obj, verbose = FALSE, assay = "Spatial")
+    # Computes the correlation of the log normalized data and sctransform residuals with the number of UMIs
+    obj <- GroupCorrelation(object = obj, group.assay = "Spatial", assay = "Spatial", slot = "data", do.plot = FALSE)
+    obj <- GroupCorrelation(object = obj, group.assay = "Spatial", assay = "SCT", slot = "scale.data", do.plot = FALSE)
+    
+    p1 <- GroupCorrelationPlot(object = obj, assay = "Spatial", cor = "nCount_Spatial_cor") + ggtitle("Log Normalization") +
+        theme(plot.title = element_text(hjust = 0.5))
+    p2 <- GroupCorrelationPlot(object = obj, assay = "SCT", cor = "nCount_Spatial_cor") + ggtitle("SCTransform Normalization") +
+        theme(plot.title = element_text(hjust = 0.5))
+    png(filename = paste(out_dir, paste0(sample, "_LogNorm_vs_SCTransf.png"), sep = "/"), width = 1200, height = 1000, res = 100)
+    print(p1 + p2)
+    dev.off()
+    # Add info and Prepare for Integration
+    obj@meta.data$orig.ident <- sample
+    obj@meta.data$TreatGroups <- treatmentGroups[i]
+    DefaultAssay(obj) <- "SCT"
+    obj <- NormalizeData(obj, verbose = FALSE)
+    obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 500, verbose = FALSE)
+    
+    reference.list <- c(reference.list, obj)
+}
+
+# Find Anchors and Integrate
+crc.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
+crc.integrated <- IntegrateData(anchorset = crc.anchors, dims = 1:30)
+
+# Analyze Integrated Object
+DefaultAssay(crc.integrated) <- "integrated"
+plot1 <- FeatureScatter(crc.integrated, feature1 = 'nCount_SCT', feature2 = "nCount_Spatial", group.by = "orig.ident")
+plot2 <- FeatureScatter(crc.integrated,  feature1 = "nFeature_SCT", feature2 = "nCount_Spatial", group.by = "orig.ident")
+plot3 <- FeatureScatter(crc.integrated,  feature1 = "percent_mito", feature2 = "nFeature_Spatial", group.by = "orig.ident")
+CombinePlots(plots = list(plot1, plot2,plot3))
+# Scale data
+crc.integrated <- ScaleData(crc.integrated, verbose = FALSE)
+crc.integrated <- RunPCA(crc.integrated, npcs = 70, verbose = FALSE)
+ElbowPlot(crc.integrated, ndims = 70)
+DefaultAssay(crc.integrated) <- "integrated"
+# UMAP and Clustering
+crc.integrated <- RunUMAP(crc.integrated, dims = 1:25, seed.use = 123)
+crc.integrated <- FindNeighbors(crc.integrated, reduction = "umap", dims = 1:2, force.recalc = T)
+crc.integrated <- FindClusters(crc.integrated, resolution = 0.1)
+
+Idents(crc.integrated) <- "integrated_snn_res.0.1"
+crc.integrated <- SCTransform(crc.integrated, assay = "Spatial", new.assay.name = "SCTintegrated")
+
+# Merge Cluster that are redundant or poorly represented
+crc.integrated@meta.data$integrated_FinalClusters <- crc.integrated@meta.data$integrated_snn_res.0.1
+crc.integrated@meta.data$integrated_FinalClusters[crc.integrated@meta.data$integrated_FinalClusters == 8] <- 2
+
+# Tmp save
+saveRDS(crc.integrated, file = paste(out_dir, "crc.integrated.rds", sep = "/"))
+
+# Loop on Samples
+for (sample in samples){
+    print(sample)
+    # SUBSET DATA AND IDENTIFY CLUSTER TO USE ("integrated_FinalClusters")
+    crc.integrated <- SetIdent(object = crc.integrated, value = "orig.ident")
+    sub1 <- subset(crc.integrated, idents = sample)
+    sub1 <- SetIdent(object = sub1, value = "integrated_FinalClusters")
+    # EXTRACT COORDINATES
+    Coord <- crc.integrated@images[[paste0(sample, "img")]]@coordinates
+    ObjClusterofInterest <- crc.integrated@meta.data$integrated_FinalClusters[crc.integrated@meta.data$orig.ident == sample]
+    # TUMOR CLUSTERS
+    Coord$ift <- (ObjClusterofInterest == 1 | ObjClusterofInterest == 6) + 0
+    # CREATING MATRIX WITH ALL POSITIONS AND 1 WHEN TUMOR IS THERE
+    maxrow <- max(Coord$row)
+    maxcol <- max(Coord$col)
+    msub1 <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
+    msub1 <- as.data.frame(msub1)
+    for (i in c(1:length(Coord$ift))) {
+        msub1[Coord$row[i], Coord$col[i]] <- Coord$ift[i]
+    }
+    # CREATING MATRIX WITH ALL POSITIONS AND 1 WHEN LIVER IS THERE
+    msub1liv <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
+    msub1liv <- as.data.frame(msub1liv)
+    for (i in c(1:length(Coord$ift))){
+        msub1liv[Coord$row[i], Coord$col[i]] <- (-Coord$ift[i]+1)
+    }
+    # CREATING MATRIX WITH NAMES TO LATER FIT THE SEURAT OBJ
+    namesTokeep <- matrix(rep(NA, maxrow * maxcol), ncol = maxcol)
+    namesTokeep <- as.data.frame(namesTokeep)
+    for (i in c(1:length(Coord$ift))){
+        namesTokeep[Coord$row[i], Coord$col[i]] <- rownames(Coord)[i]
+    }
+    # COMPUTING moving average on tumors
+    masub1 <- ma.matrix(as.matrix(msub1), av = "mean", delta = 2, edgeNA = FALSE )
+    masub1 <- ma.matrix(as.matrix(msub1), av = "mean", delta = 3, edgeNA = FALSE ) + (masub1*2)
+    indexes1 <- msub1 # 1 is tumor 0 is liver na is empty
+    masub1 <- masub1 * indexes1
+    # COMPUTING moving average on liver
+    masub1liv <- ma.matrix(as.matrix(msub1liv), av = "mean", delta = 2, edgeNA = FALSE)
+    masub1liv <- ma.matrix(as.matrix(msub1liv), av = "mean", delta = 3, edgeNA = FALSE) * 0.1 + (masub1liv * 2)
+    indexes1 <- msub1
+    masub1liv <- masub1liv * (-1*(indexes1-1))
+    # Make final data.frame with scores, and cell IDS
+    # Scores of tumor
+    A1 = 2.96
+    A2 = 2.7
+    A3 = 2.3
+    masub1[masub1 > A1] <- (-4)
+    masub1[masub1 <= A1 & masub1 > A2] <- (-3)
+    masub1[masub1 <= A2 & masub1 > A3] <- (-2)
+    masub1[masub1 <= A3 & masub1 > 0] <- (-1)
+    # Score of liver
+    A1 = 2.095
+    A2 = 1.91
+    A3 = 1.7
+    masub1liv[masub1liv <= A3 & masub1liv > 0] <- 0
+    masub1liv[masub1liv <= A2 & masub1liv > A3] <- 1
+    masub1liv[masub1liv <= A1 & masub1liv > A2] <- 2
+    masub1liv[masub1liv > A1] <- 3
+    # Merging df finalScores
+    finalScores <- masub1 + masub1liv
+    finalScores[finalScores == (-4)] <- "Zone_A"
+    finalScores[finalScores == (-3)] <- "Zone_B"
+    finalScores[finalScores == (-2)] <- "Zone_C"
+    finalScores[finalScores == (-1)] <- "Zone_D"
+    finalScores[finalScores == 0] <- "Zone_E"
+    finalScores[finalScores == 1] <- "Zone_F"
+    finalScores[finalScores == 2] <- "Zone_G"
+    finalScores[finalScores == 3] <- "Zone_H"
+    finalScores[finalScores == "NaN"] <- NA
+    # Making the table with SEURAT iDS
+    FinalTable <- data.frame(SeuratID = as.vector(as.matrix(namesTokeep)),
+                             datasub1 = as.vector(as.matrix(finalScores)),
+                             drow = rep(1:nrow(masub1), ncol(masub1)),
+                             dcol = rep(1:ncol(masub1), nrow(masub1))[order(rep(1:ncol(masub1), nrow(masub1)))]
+                             )
+    FinalTable <- na.omit(FinalTable)
+    # Merging in our seurat obj WARNING ix = "obj3"
+    SeuratPos <- names(crc.integrated@active.ident)
+    FinalTable <- FinalTable[order(FinalTable$SeuratID),]
+    if(!"Zones" %in% colnames(crc.integrated@meta.data)){
+        crc.integrated@meta.data$Zones <- rep("Zone_A", length(SeuratPos))
+    }
+    crc.integrated@meta.data$Zones[crc.integrated@meta.data$orig.ident == sample] <- FinalTable$datasub1
+    
+    objs <- SplitObject(crc.integrated, split.by = "orig.ident")
+    pdf(paste(out_dir, paste0(sample, ".zones.pdf"), sep = "/"), heigh = 8, width = 8)
+    print(SpatialDimPlot(objs[[sample]], group.by = "Zones", images = paste0(sample, "img"), crop = T))
+    dev.off()
+}
+
+# Export Meta Data
+full_md <- crc.integrated@meta.data
+gz_out_md <-  gzfile(paste(data_dir, "crc.integrated_metadata.csv.gz", sep = "/"), "w")
+write.csv(x = full_md, gz_out_md)
+close(gz_out_md)
+
+# Save final object
+saveRDS(crc.integrated, file = paste(out_dir, "crc.integrated.rds", sep = "/"))
--- a/scripts/1_GSEA_Visualization.R
+++ b/scripts/1_GSEA_Visualization.R
+library(Seurat)
+library(fgsea)
+library(gplots)
+
+########################
+### General Settings ###
+########################
+# Working dir
+wdir <- "~/Downloads/TIGET/Squadrito/scSquadrito/squadrito_livertumor2022_spatial"
+# Data dir
+data_dir <- paste(wdir, "data", sep = "/")
+# Results dir
+out_dir <- paste(wdir, "results", sep = "/")
+dir.create(path = out_dir, showWarnings = F)
+
+# Read Obj
+crc.integrated <- readRDS(file = paste(out_dir, "crc.integrated.rds", sep = "/"))
+
+## Create Table Visium_DIFGenes_Groupe & SpatialCompartments
+## All groups
+crc.integrated@meta.data$Zones_unique <- paste(crc.integrated@meta.data$TreatGroups, crc.integrated@meta.data$Zones, sep = "_")
+nClus <- unique(crc.integrated@meta.data$Zones_unique)
+DefaultAssay(crc.integrated) <- "SCTintegrated"
+crc.integrated <- SetIdent(object = crc.integrated, value = "Zones_unique")
+cluster0degAll <- NULL
+for (i in nClus) {
+    print(i)
+    cluster0genes <- FindMarkers(crc.integrated, ident.1 = i, min.pct = 0, only.pos = FALSE, min.cells.group = 1,
+                                 test.use = "wilcox", return.thresh = 1, logfc.threshold = 1e-11)
+    cluster0genes$GeneID <- rownames(cluster0genes)
+    cluster0deg <- cluster0genes
+    cluster0deg$Cluster <- i
+    cluster0degAll <- rbind(cluster0degAll, cluster0deg)
+}
+write.table(x = cluster0degAll, file = paste(data_dir, "Zones_unique.integrated.txt", sep = "/"),
+            quote = FALSE, row.names = TRUE, sep = "\t")
+
+## Figure 4a
+## Load gsea signature file
+miDB_sig2 <- readRDS(paste(data_dir, "miDB_sig3.rds", sep = "/")) #Ctrl
+
+# Loop on Zones unique
+Clusters <- unique(cluster0degAll[,7])
+TopResultAllCluster <- NULL
+for (i in Clusters) {
+    cat(paste("Calculating ", i, "...", sep = ""))
+    CLusterTD <- i
+    df2 <- cluster0degAll[cluster0degAll[,7] == CLusterTD, 2]
+    names(df2) <- cluster0degAll[cluster0degAll[,7] == CLusterTD, 6]
+    df2 <- df2[order(df2)]
+    test1 <- fgsea(pathways = miDB_sig2, stats = df2, minSize = 7, maxSize = 500)
+    test1 <- test1[rev(order(NES))]
+    TopResult <- as.data.frame(test1[,])[, c(1,2,3,4,5,6,7)]
+    TopResult$Cluster <- i
+    TopResultAllCluster <- rbind(TopResultAllCluster, TopResult)
+    cat(paste("done!", "\n", sep = ""))
+}
+write.table(x = TopResultAllCluster, file = paste(data_dir, "GSEA_Zones_unique_integrated.res01.txt", sep = "/"),
+            quote = FALSE, row.names = TRUE, sep = "\t")
+## Heatmaps ##
+## Order NES values in a unique table having colum as clusters
+## and rows as pathways, if adj.pval > 0.05 NA is introduced 
+
+# Create colum with groups
+TopResultAllCluster$Group <- stringr::str_split_fixed(TopResultAllCluster$Cluster, "_", 2)[,1]
+# Create colum with Zones
+TopResultAllCluster$Zones <- stringr::str_split_fixed(TopResultAllCluster$Cluster, "_", 2)[,2]
+
+#Preparing Heatmap parameters
+orderC <- unique(TopResultAllCluster$Zones)[order(unique(TopResultAllCluster$Zones))]
+SideColors <- rep(c(rep("darkred",3), rep("red",1), rep("darkgreen",3), rep("green",1)), 3)
+MLScol <- colorRampPalette(c("darkblue","blue", "grey", "orange","red"), space = "rgb")
+
+# Funtion - assigns the first colum as name
+newName<-function(dfCNES,i) {
+    rownames(dfCNES) <- dfCNES[, i]
+    dfCNES <- dfCNES[, -(i)]
+    return(dfCNES)
+}
+
+#create tables with enriched terms
+for (ix in unique(TopResultAllCluster$Group)){
+    test <- TopResultAllCluster[TopResultAllCluster$Group == ix,]
+    test <- test[!is.na(test$padj),]
+    test <- test[test$padj < 0.05,]
+    AllCluster <- unique(test$Zones)
+    orgerGO <- unique(test$pathway)
+    dfCNES <- data.frame(pathway = orgerGO[order(orgerGO)])
+    for (i in 1:length(AllCluster)){
+        dfC1 <- test[test$Zones == AllCluster[i],]
+        dfC1 <- data.frame(dfC1[order(dfC1$pathway), c(1, 6)])
+        colnames(dfC1) <- c("pathway", AllCluster[i])
+        dfCNES <- merge(dfCNES, dfC1, by = 1, all = T)
+    }
+    dfCNES <- newName(dfCNES, 1)
+    dfCNES <- dfCNES[rowSums(is.na(dfCNES)+0) < 8,]
+    dfCNES[is.na(dfCNES)] <- 0
+    finaldf2 <- as.matrix(dfCNES)
+    finaldf2 <- finaldf2[, orderC]
+    nameF <- paste(ix, "uniquezones.txt", sep = ".")
+    assign(nameF, finaldf2)
+    write.table(x = finaldf2, file = paste(out_dir, nameF, sep = "/"), quote = FALSE, row.names = T, sep = "\t")
+}
+
+# Select to plot manually curated
+Selected.Terms <- c("GOBP_RESPONSE_TO_TYPE_I_INTERFERON",
+                    "GOBP_RESPONSE_TO_INTERFERON_GAMMA",
+                    "GOBP_RESPONSE_TO_VIRUS",
+                    "GOBP_POSITIVE_REGULATION_OF_CYTOKINE_PRODUCTION",
+                    "GOBP_T_CELL_ACTIVATION",
+                    "GOBP_ADAPTIVE_IMMUNE_RESPONSE",
+                    "GOBP_REGULATION_OF_IMMUNE_EFFECTOR_PROCESS",
+                    "IL10_RO",
+                    "HALLMARK_ANGIOGENESIS",
+                    "HALLMARK_P53_PATHWAY",
+                    "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION",
+                    "HALLMARK_ADIPOGENESIS",
+                    "HALLMARK_PEROXISOME",
+                    "GOBP_SHORT_CHAIN_FATTY_ACID_METABOLIC_PROCESS",
+                    "GOBP_LIPID_OXIDATION",
+                    "HALLMARK_BILE_ACID_METABOLISM")
+
+# Loop to create all Heatmaps
+dfNames <- paste0(unique(TopResultAllCluster$Group),".uniquezones.txt")
+ix3 <- matrix(nrow = length(Selected.Terms), ncol = 0)
+row.names(ix3) <- Selected.Terms
+for (i in dfNames){
+    ix <- read.table(paste(out_dir, i, sep = "/"), header = T, sep = "\t")
+    ix <- ix[rownames(ix) %in% Selected.Terms,]
+    diffG <- setdiff(Selected.Terms,rownames(ix))
+    diffM <-  matrix(rep(0, 8*length(diffG)), ncol = 8)
+    rownames(diffM) <- diffG
+    colnames(diffM) <- colnames(ix)
+    ix2 <- rbind(ix, diffM)
+    ix2 <- ix2[Selected.Terms,]
+    colnames(ix2) <- paste(sub(".uniquezones.txt", "", i), colnames(ix2), sep = ".")
+    ix3 <- cbind(ix3, ix2)
+}
+
+ix3 <- as.matrix(ix3)
+pdf(paste(out_dir, "Heatmap.uniquezones.pdf", sep = "/"), heigh = 6, width = 8)
+heatmap.2(ix3, margins = c(5, 12), col = MLScol, cexCol = 0.7, cexRow = 0.5, main = i, scale = "none", Rowv = F, Colv = F, ColSideColors = SideColors)
+legend("topright", legend = c("Inner tumor", "Border tumor", "Peritumor", "Liver"), col = unique(SideColors), lty = 1, lwd = 5, cex = .7)
+dev.off()
+
+## Extended Figure 7c left
+DimPlot(crc.integrated, reduction = "umap", group.by = "integrated_FinalClusters", label = T, pt.size = 2)
+
+## Extended Figure 7c right
+SpatialDimPlot(crc.integrated, images = "Sample4img", group.by = "integrated_FinalClusters", pt.size.factor = 2.1)
+
+## Extended Figure 7d
+DotPlot(crc.integrated, features = c("Epcam", "Cdh1", "Cldn7", "Actb", "S100a6", "Tmsb10", "Timp1", 
+                                     "Bgn", "Col3a1", "Spp1", "Alb", "Fabp1", "Apob", "Car3"),
+        group.by = "integrated_FinalClusters", dot.scale = 15)
+
+## Extended Figure 7e
+SpatialDimPlot(crc.integrated, images = "Sample3img", group.by = "Zones", pt.size.factor = 3.5)
+SpatialDimPlot(crc.integrated, images = "Sample4img", group.by = "Zones", pt.size.factor = 2.1)
+SpatialDimPlot(crc.integrated, images = "Sample7img", group.by = "Zones", pt.size.factor = 2.1)
+SpatialDimPlot(crc.integrated, images = "Sample10img", group.by = "Zones", pt.size.factor = 2.5)
+SpatialDimPlot(crc.integrated, images = "Sample11img", group.by = "Zones", pt.size.factor = 2.1)
+SpatialDimPlot(crc.integrated, images = "Sample14img", group.by = "Zones", pt.size.factor = 2.1)
+SpatialDimPlot(crc.integrated, images = "Sample19img", group.by = "Zones", pt.size.factor = 2.1)
+SpatialDimPlot(crc.integrated, images = "Sample22img", group.by = "Zones", pt.size.factor = 2.1)
+
+