vulcano.R 2.12 KB
Newer Older
Ivan Merelli's avatar
Ivan Merelli committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(dplyr)
library(ggplot2)
library(ggrepel)

args <- commandArgs(trailingOnly = TRUE)
file_path <- args[1]
max_log2FC <- 5
min_log2FC <- -5
min_p_val_adj <-  Il valore minimo di p-value adjusted passato da Python



# Se il minimo p-value adjusted ?? inferiore a 10^-100, fissalo a 10^-100
min_p_val_adj <- 10^-100

raw <- read.table("/DATA/31/molteni/vexas_bm/results/integration/DE_markers/Pz_CTRL/markers_cluster_Late_erythroid_cells.csv", header = TRUE, sep = ",", quote = "\"", dec = ".")
adj.pval.thr <- 0.05

# Imposta un limite inferiore per i p-value adjusted direttamente nei dati
raw$p_val_adj <- pmax(raw$p_val_adj, min_p_val_adj)

data <- data.frame(gene = raw$X, pval = -log10(raw$p_val_adj), lfc = raw$avg_log2FC)
data <- na.omit(data)
data <- mutate(data, color = case_when(
  data$lfc > 0 & data$pval > -log10(adj.pval.thr) ~ "Overexpressed",
  data$lfc < 0 & data$pval > -log10(adj.pval.thr) ~ "Underexpressed",
  TRUE ~ "NonSignificant"
))

# Seleziona i geni di interesse per etichettarli
# HBG1/2, ALAS2, HBB, HBD, FECH, NCOA4, SLC25A37, GLRX5, EIF2AK1.
selected_genes <- c("HBG1" ,"HBG2", "ALAS2", "HBB", "HBD", "FECH", "NCOA4", "SLC25A37", "GLRX5", "EIF2AK1")
highl <-  data %>% filter(gene %in% selected_genes)

highl <- head(subset(data, color != "NonSignificant"), 20)

vol <- ggplot(data, aes(x = lfc, y = pval, colour = color)) +
  geom_point(size = 2, alpha = 0.8, na.rm = TRUE) +
  geom_hline(yintercept = -log10(adj.pval.thr), colour = "darkgray", linetype = "dashed") +
  scale_color_manual(name = "Expression", values = c(Overexpressed = "#CD4F39", Underexpressed = "#008B00", NonSignificant = "darkgray")) +
  geom_label_repel(data = highl, aes(label = gene), box.padding = 0.35, point.padding = 0.5, size = 5, max.overlaps = 10) +
  theme_bw(base_size = 12) +
  theme(legend.position = "right") +
  xlab("log2 Fold Change") +
  ylab(expression(-log[10]("adjusted p-value"))) +
  xlim(min_log2FC, max_log2FC) +
  ylim(0, max(-log10(min_p_val_adj), -log10(raw$p_val_adj)) + 1) # Assicura che il plot includa tutti i valori di p-value

# Salva il plot
ggsave("volcano_late_ery.pdf", plot = vol, width = 11, height = 8, dpi = 300)