suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(openxlsx))
suppressPackageStartupMessages(library(RColorBrewer))


wd <- "/beegfs/scratch/ric.squadrito/ric.squadrito/202312011215_VMSC10702-NotaroTAD_A019/Analysis_MM/python_scripts/circles"
setwd(wd)

# M21 and M40 are TA_Only and M18 and M41 are TA_Combo 
sample <- "M18" # "M18", "M21", "M40", "M41"

# Define the distance threshold. Default d=30um
d <- 30 # 10 or 30

inCell <- "inCell_" # "inCell_" or ""

# _transcripts_distance_matrix_t30_g500.csv _transcripts_distance_matrix_inCell_t10_g500-2.csv _transcripts_distance_matrix_post_t30_g500.csv
td_matrix_norm <- read.csv(paste0(wd, "/distance_table/", sample, "_transcripts_distance_matrix_norm_", inCell, "t", d, "_g35_selected.csv"))
plot_dir <- paste0(wd, "/distance_table/selected/")
dir.create(plot_dir, showWarnings=F, recursive=T)

td_matrix_norm$X <- NULL
genes <- colnames(td_matrix_norm)

H2_genes <- genes[grep(x = genes, pattern = "H2")]
H2_genes_modified <- gsub("\\.", "-", H2_genes) # replace "." with "-"

# Change the column names of the genes that start with H2...
#colnames(td_matrix_norm[H2_genes]) <- H2_genes_modified
genes <- gsub("\\.", "-", genes) # replace "." with "-"

colnames(td_matrix_norm) <- genes
rownames(td_matrix_norm) <- genes

td_matrix_norm$X <- NULL
colnames(td_matrix_norm) <- genes
rownames(td_matrix_norm) <- genes

#Cancer cell signature from top genes
SelectedGenes <- unique(c("S100a6", "Klf4", "Anxa2", "Cdkn2a", "Vim",
                          #antigen presentation
                          "Ciita", "H2-DMb1", "H2-Aa", "H2-Ab1", 
                          #Macrophages
                          "Clec4f", "C1qa", "Marco",
                          #T cell activation
                          "Trac", "Cd3e", "Klf2", "Sell", "Il2rb", "Stat1","Ccr5", "Il12rb1", "Cd40",
                          #IFN
                          "Irf1", "Isg15", "Ifit2", "Cxcl9","Il18r1",
                          #protumoral
                          "Serpine1","Cebpb","Pdgfb","Lrg1","Id2", "Vegfa", "Il1r1", "Cd274", "Tgfb1")) # Cd81

SelectedGenes_ann <- c("Cancer genes", "Cancer genes", "Cancer genes", "Cancer genes", "Cancer genes",
                       #antigen presentation
                       "MHCI", "MHCI", "MHCI", "MHCI", 
                       #Macrophages
                       "Macs", "Macs","Macs",
                       #T cell activation
                       "T cell activation", "T cell activation", "T cell activation", "T cell activation", "T cell activation", "T cell activation","T cell activation", "T cell activation", "T cell activation",
                       #IFN
                       "IFN", "IFN", "IFN", "IFN","IFN",
                       #Protumoral
                       "Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral","Protumoral")

gene_annotation <- data.frame(annotation = SelectedGenes_ann)
row.names(gene_annotation) <- SelectedGenes

signature_levels <- c("Cancer genes", "MHCI", "Macs", "T cell activation", "IFN", "Protumoral")
gene_annotation$annotation <- factor(gene_annotation$annotation, levels = signature_levels, ordered = T)

td_matrix_norm_selected <- td_matrix_norm[SelectedGenes, SelectedGenes]

pheatmap(td_matrix_norm_selected, cluster_rows = F, cluster_cols = F, na_col = "grey",  scale = "none",
         filename = paste0(plot_dir, sample, "_transcripts_distance_matrix_norm_", inCell, "d", d, "_selected.pdf"), width = 15, height = 15,
         fontsize=9, annotation_col = gene_annotation, annotation_row = gene_annotation)


# Compare two samples

# M21 and M40 are TA_Only and M18 and M41 are TA_Combo 
sample2 <- "M21" # "M18", "M21", "M40", "M41"

td_matrix_norm_S2 <- read.csv(paste0(wd, "/distance_table/", sample2, "_transcripts_distance_matrix_norm_", inCell, "t", d, "_g35_selected.csv"))

td_matrix_norm_S2$X <- NULL
colnames(td_matrix_norm_S2) <- genes
rownames(td_matrix_norm_S2) <-genes

# Combine the two samples by subtracting the two matrices
td_matrix_norm_S1_S2 <- td_matrix_norm - td_matrix_norm_S2

td_matrix_norm_selected_S1_S2 <- td_matrix_norm_S1_S2[SelectedGenes, SelectedGenes]

# Plot the results
paletteLength <- 50
myColor <- colorRampPalette(c("blue", "white", "red"))(paletteLength)

# Use floor and ceiling to deal with even/odd length pallettelengths
MIN <- min(td_matrix_norm_selected_S1_S2[!is.na(td_matrix_norm_selected_S1_S2)])
MAX <- max(td_matrix_norm_selected_S1_S2[!is.na(td_matrix_norm_selected_S1_S2)])
myBreaks <- c(seq(MIN, 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(MAX/paletteLength, MAX, length.out=floor(paletteLength/2)))


pheatmap(td_matrix_norm_selected_S1_S2, cluster_rows = F, cluster_cols = F, na_col = "grey",  scale = "none",
         filename = paste0(plot_dir, sample, "_", sample2, "_transcripts_distance_matrix_norm_", inCell, "d", d, "_selected.pdf"), width = 15, height = 15,
         fontsize=9, annotation_col = gene_annotation, annotation_row = gene_annotation, color = myColor, breaks = myBreaks)