library(MuSiC) library(dplyr) library(nichenetr) library(openxlsx) library(ggplot2) library(patchwork) minimalset <- readRDS("minimalset.rds") # identify genes with double entry obj <- read.csv(file = "GSM2985844_P1.raw_umifm_counts.csv", header = T, stringsAsFactors = F, quote = "", as.is = T) human_symbols = colnames(obj)[10:ncol(obj)] %>% nichenetr::convert_mouse_to_human_symbols() human_ids_doubled <- gsub(pattern = "\\.1$", replacement = "", x = grep(x = rownames(minimalset@assayData$exprs[rowSums(minimalset@assayData$exprs) > 10,]), pattern = "\\.1$", perl = T, value = T)) a <- minimalset@assayData$exprs[rownames(minimalset@assayData$exprs) %in% human_ids_doubled,] a <- a[order(rownames(a)), ] b <- minimalset@assayData$exprs[rownames(minimalset@assayData$exprs) %in% paste0(human_ids_doubled,".1"),] b <- b[order(rownames(b)), ] u <- a + b # Re-Creating expression matrix (filtered with rowsum 10 + fixing human genes coming from 2 different mouse genes) ori.mt <- minimalset@assayData$exprs ori.mt.filt <- ori.mt[rowSums(ori.mt) > 10,] ori.mt.distinct <- ori.mt.filt[!rownames(ori.mt.filt) %in% c(human_ids_doubled, paste0(human_ids_doubled,".1")),] # checks table(rownames(u) %in% rownames(ori.mt.distinct)) dim(u) dim(ori.mt.distinct) rm(a); rm(b) # final matrix mt <- rbind(ori.mt.distinct, u) mt <- mt[order(rownames(mt)), ] # recreating minimal set minimalset <- ExpressionSet(assayData=mt) rm(mt) rm(ori.mt) rm(ori.mt.distinct) rm(ori.mt.filt) rm(u) rm(obj) rownames(minimalset@phenoData@data) <- colnames(minimalset@assayData$exprs) minimalset@phenoData@data$SampleID <- rownames(minimalset@phenoData@data) minimalset@phenoData@data$Cluster <- c(rep("cl1",9640), rep("cl2",2537), rep("cl3",3562), rep("cl4",3695)) minimalset@phenoData@data$CellType <- c(rep("CFUe",9640), rep("CFUe_CD71",2537), rep("BFUe",3562), rep("EryBiased",3695)) # Loading rnaseq rnaseqSet <- readRDS("rnaseqSet.rds") # Running music default Est.prop.Tusi <- music_prop(bulk.eset = rnaseqSet, sc.eset = minimalset, clusters = 'Cluster', samples = 'SampleID', verbose = T) saveRDS(Est.prop.Tusi,file = "Est.prop.Tusi.rds") write.xlsx(x = Est.prop.Tusi, "Estimate_proportions_full_rnaseq_dataset.xlsx", rowNames = T) saveRDS(object = minimalset, file = "Expression_set_singlecell_rsum_10.rds") # Version filtered (rowsum 25 - Tusi) exprs_count <- rnaseqSet@assayData$exprs[rowSums(rnaseqSet@assayData$exprs) > 25,] pheno <- readRDS(file = "phenodata.rds") rnaseqSet <- ExpressionSet(assayData = exprs_count, phenoData = pheno) Est.prop.Tusi_rowsum25 <- music_prop(bulk.eset = rnaseqSet, sc.eset = minimalset, clusters = 'Cluster', samples = 'SampleID', verbose = T) write.xlsx(x = Est.prop.Tusi_rowsum25, "Estimate_proportions_rowsum25_rnaseq_dataset.xlsx", rowNames = T) saveRDS(Est.prop.Tusi_rowsum25,file = "Est.prop.Tusi_rowsum25.rds") ## considering values from Est.prop.Tusi_rowsum25 ## # p subset of values only relative to hsc and mpp p$disease <- factor(p$disease, levels = c("HD", "Bthal")) hscmpp_cl1 <- ggplot(p, aes(x= celltype, y = cl1)) + geom_boxplot(aes(fill = disease)) + geom_point(position = position_dodge(width = 0.7), aes(group = disease)) + theme(legend.title = element_text(size=12)) + theme(legend.text = element_text( size=12)) + #facet_grid(.~ variable) + labs(title = "Estimation of P1 population", x="Cell type", y = "Est. prop.") + scale_fill_manual(values = c("gray64", "red2")) hscmpp_cl2 <- ggplot(p, aes(x= celltype, y = cl2)) + geom_boxplot(aes(fill = disease)) + geom_point(position = position_dodge(width = 0.7), aes(group = disease)) + theme(legend.title = element_text(size=12)) + theme(legend.text = element_text( size=12)) + #facet_grid(.~ variable) + labs(title = "Estimation of P1 CD71high population", x="Cell type", y = "Est. prop.") + scale_fill_manual(values = c("gray64", "red2")) png(filename = "Estimation_P1_P1cd71_hscmpp.png", width = 14, height = 7, units = "in", res = 200) hscmpp_cl2 + hscmpp_cl1 + plot_annotation(tag_levels = "A") dev.off()