Commit f0c3cffa authored by Monah Abou Alezz's avatar Monah Abou Alezz
Browse files

revisions_update_v1

parent 7dcd8057
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(RColorBrewer))
## Input featureCounts file
fcount.file <- "Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_corrected.txt"
## Contrasts to perform
contr <- "DLL4_plus:CD32_plus"
contr.list <- unlist(strsplit(contr, ","))
## Design Formula
formula.str <- "condition"
design.str <- gsub(",", "+", formula.str)
num_cond <- str_count(formula.str, ",") + 1
design.formula <- as.formula(paste("~", design.str, sep = ""))
## Read featureCounts results
if (!file.exists(fcount.file)) {
stop("Error: featureCounts file not found.")
} else {
read.counts <- read.table(fcount.file,
header = TRUE,
check.names = FALSE) %>%
column_to_rownames(var = "Geneid") %>%
select(-c(1:5))
}
orig.names <- names(read.counts)
ds <- list()
cond <- list()
for (i in seq(orig.names[1:length(orig.names)])) {
ds[i] <- unlist(strsplit(orig.names[i], ":"))[2]
cond[i] <- unlist(strsplit(orig.names[i], ":"))[3]
}
ds <- unlist(ds)
cond <- unlist(cond)
## Samples - Conditions
sample_info <- as.data.frame(str_split_fixed(cond, ",", num_cond))
colnames(sample_info) <- str_split_fixed(formula.str, ",", num_cond)
rownames(sample_info) <- ds
colnames(read.counts) <- ds
sample_info$condition <- factor(sample_info$condition)
last_condition <- str_split_fixed(formula.str, ",", num_cond)[num_cond]
## thresholds
adj.pval.thr <- 0.05
logfc.thr <- 0
## DESeq2
DESeq.ds <- DESeqDataSetFromMatrix(countData = read.counts,
colData = sample_info,
design = design.formula)
## Remove genes without any counts
DESeq.ds <- DESeq.ds[rowSums(counts(DESeq.ds)) > 0, ]
## RunDGE
DESeq.ds <- DESeq(DESeq.ds)
## obtain regularized log-transformed values
DESeq.rlog <- rlogTransformation(DESeq.ds, blind = TRUE)
ctrs <- unlist(strsplit(contr.list[1], ":"))
c1 <- ctrs[1]
c2 <- ctrs[2]
DGE.results <- results(DESeq.ds,
pAdjustMethod = "BH",
independentFiltering = TRUE,
contrast = c(last_condition, c1, c2),
alpha = adj.pval.thr)
# Change format
DGE.results.annot <- as.data.frame(DGE.results) %>%
dplyr::select(log2FoldChange, lfcSE, baseMean, pvalue, padj)
colnames(DGE.results.annot) <- c("logFC", "lfcSE", "baseMean", "PValue", "FDR")
## sort the results according to the adjusted p-value
DGE.results.sorted <- DGE.results.annot[order(DGE.results.annot$FDR), ]
## Subset for only significant genes
DGE.results.sig <- subset(DGE.results.sorted,
FDR < adj.pval.thr & abs(logFC) > logfc.thr)
DGEgenes <- rownames(DGE.results.sig)
# ED_Fig7A -----------------------------------------------------------------
gmt.obj <- read.gmt("Scarfo_HEC2023/BulkRNAseq_1/c5.all.v7.2.symbols.gmt")
organism.db <- org.Hs.eg.db
# Remove genes with no FDR
DGE.results.annot <- DGE.results.annot[!is.na(DGE.results.annot$FDR), ]
gene_univ <- row.names(DGE.results.annot)
# get positive and negative gene names
gene.res.top <- subset(DGE.results.annot, FDR < adj.pval.thr & abs(logFC) > logfc.thr)
gene.res.top.pos <- subset(gene.res.top, logFC > 0)
gene.res.top.neg <- subset(gene.res.top, logFC < 0)
contr.enricher.neg <- enricher(row.names(gene.res.top.neg),
universe = gene_univ,
pvalueCutoff = adj.pval.thr,
TERM2GENE = gmt.obj)
contr.enricher.res <- as.data.frame(contr.enricher.neg)
toi <- c("GO_RESPONSE_TO_BMP", "GO_REGULATION_OF_BMP_SIGNALING_PATHWAY")
GO_terms_int <- contr.enricher.res[contr.enricher.res$Description %in% toi, ]
# reformatting the ID column
tmp <- as.data.frame(str_split_fixed(GO_terms_int$ID, "GO_", 2))
tmp2 <- as.data.frame(str_replace_all(tmp$V2, "_", " "))
colnames(tmp2) <- "id"
tmp2$Terms <- str_to_title(tmp2$id)
GO_terms_final <- cbind(GO_terms_int,tmp2)
GO_terms_final <- GO_terms_final[,c(11,9,6)]
colnames(GO_terms_final)[3] <- "Adjusted p-value"
ggplot(data=GO_terms_final,
aes(x=Terms,
y=Count,
fill = `Adjusted p-value`)) +
geom_bar(stat="identity",
width = 0.7) +
scale_x_discrete(limits = GO_terms_final$Terms) +
scale_fill_gradient(low="red",high="blue") +
coord_flip()+
ggtitle("Downregulated in DLL4+ vs CD32+") +
theme_bw() +
theme(aspect.ratio = 0.8)
...@@ -65,7 +65,9 @@ scRNAseq analysis was performed with [Seurat](https://satijalab.org/seurat/). Be ...@@ -65,7 +65,9 @@ scRNAseq analysis was performed with [Seurat](https://satijalab.org/seurat/). Be
- [6.1] Clusters related markers - [6.1] Clusters related markers
- [6.2] Intracluster differential expression analysis according to comparison of interest - [6.2] Intracluster differential expression analysis according to comparison of interest
Pseudotime analysis was performed using [Monocle3](http://cole-trapnell-lab.github.io/monocle3/) Pseudotime analysis was performed using [Monocle3](http://cole-trapnell-lab.github.io/monocle3/) and PAGA-tree from [dynverse](https://dynverse.org/)
In-silico perturbation analysis was performed using [CellOracle](https://github.com/morris-lab/CellOracle)
Input files for scRNAseq analysis are available in the following [link](https://www.dropbox.com/sh/83dxrxqer8cl081/AAC8zALRuYRGh1mEe4lZuaHZa?dl=0) Input files for scRNAseq analysis are available in the following [link](https://www.dropbox.com/sh/83dxrxqer8cl081/AAC8zALRuYRGh1mEe4lZuaHZa?dl=0)
......
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(clusterProfiler))
## load Seurat data
sample <- readRDS("Scarfo_HEC2023/scRNAseq/WNTd_HE.rds")
## Seurat markers
markers <- FindAllMarkers(sample,
only.pos = T,
min.pct = 0.1,
logfc.threshold = 0.25)
## define RUNX1 clusters
runx1.clusters <- subset(markers, markers$gene=="RUNX1")
runx1.clusters <- droplevels(runx1.clusters$cluster)
## subset RUNX1 clusters into new object
runx1 <- subset(sample, idents = runx1.clusters)
## differential markers
degs_clu11_vs_clu0_1_2 <- FindMarkers(object = runx1,
ident.1 = 11,
ident.2 = c(0,1,2),
only.pos = FALSE,
min.pct = 0,
test.use = "wilcox",
logfc.threshold = 0,
min.cells.group = 0)
## thresholds
adj.pval.thr <- 0.05
logfc.thr <- 0
# Remove genes with no FDR
markers <- degs_clu11_vs_clu0_1_2[!is.na(degs_clu11_vs_clu0_1_2$p_val_adj), ]
gene_univ <- row.names(markers)
# get positive and negative gene names
gene.res.top <- subset(markers, p_val_adj < adj.pval.thr & abs(avg_logFC) > logfc.thr)
gene.res.top.pos <- subset(gene.res.top, avg_logFC > 0)
gene.res.top.neg <- subset(gene.res.top, avg_logFC < 0)
organism.db <- org.Hs.eg.db
contr.enrich_path <- enrichPathway(gene = gene.res.top.neg,
organism = organism.db,
universe = gene_univ,
pvalueCutoff = adj.pval.thr)
contr.enrich_path.symb <- setReadable(contr.enrich_path, org_db, keyType = "ENTREZID")
toi <- c("RHO GTPases activate IQGAPs", "RHO GTPases Activate ROCKs")
GO_terms_int <- contr.enrich_path.symb[contr.enrich_path.symb$Description %in% toi, ]
colnames(GO_terms_int)[2] <- "Terms"
GO_terms_int$`Adjusted p-value` <- GO_terms_int$qvalue
ggplot(data=GO_terms_int,
aes(x=Terms,
y=Count,
fill = `Adjusted p-value`)) +
geom_bar(stat="identity",
width = 0.7) +
scale_fill_gradient(low="red",high="blue") +
coord_flip()+
xlab("Terms\n") +
ggtitle("Downregulated in Cluster 11 vs 0,1,2") +
theme_bw() +
theme(aspect.ratio = 0.8)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment