library(MuSiC) library(Biobase) library(magrittr) library(tibble) library(dplyr) library(tidyr) library(knitr) library(xlsx) library(ggplot2) library(openxlsx) library(RColorBrewer) library(clusterProfiler) library(pheatmap) ## what is needed: mydataframe, coldata coldata <- read.xlsx("Dataframe/mydataframe_all.xlsx") coldata <- as.data.frame(coldata) coldata <- subset.data.frame(x = coldata, subset = !coldata$expgroup == "THAL_GMP" ) coldata <- subset.data.frame(x = coldata, subset = !coldata$expgroup == "HD_GMP" ) coldata$sex <- c(rep("Male", 16), rep("Female", 8), rep("Male", 4), rep("Female", 11)) coldata$sex <- as.factor(coldata$sex) coldata$time <- as.factor(coldata$time) coldata$disease <- as.factor(coldata$disease) coldata$name <- as.factor(coldata$name) rownames(coldata) <- coldata$rownames ## CountData countData <- read.table(file = "Full-gene-counts.txt", header = TRUE, stringsAsFactors = FALSE, row.names = 1) countData <- countData[ , c(1:8,11:18,21:43)] colnames(countData) <- coldata$name coldata <- droplevels.data.frame(coldata) coldata$time <- factor(coldata$time, levels = c("HSC","MPP","CMP","MEP", "R5","R6","R7","R8")) ## covert data.frame in matrix for expressionset exprs_count <- as.matrix(countData) minimalSet <- ExpressionSet(assayData = exprs_count) pdata <- coldata rownames(pdata) <- pdata$name ## verify the order of pdata and exprs count ## all(rownames(pdata) == colnames(exprs_count)) ## MUST BE EQUAL metadata <- data.frame(labelDescription = c("rownames used" , "Patient name", "differentiation time", "Thal/HD condition", "expgroup", "patient gender"), row.names = c("rownames","name", "time", "disease", "expgroup", "sex")) phenodata <- new("AnnotatedDataFrame", data = pdata, varMetadata = metadata) rnaseqSet <- ExpressionSet(assayData = exprs_count, phenoData = phenodata) saveRDS(rnaseqSet, "rnaseqSet.rds")