# Prepare expression sets from Tusi dataset: # Steps: # 1. Import data # 2. Re-format (traspose and remove meta) # 2.1 Convert alias to symbols # 2.2 Convert mouse to human symbols # 3. Create expression set with PhenoData added with info library(data.table) library(nichenetr) library(dplyr) # for inputs see Tusi et al manuscript. infiles <- list(P1 = "GSM2985844_P1.raw_umifm_counts.csv", P1CD71 = "GSM2985845_P1-CD71hi.raw_umifm_counts.csv", P2 = "GSM2985846_P2.raw_umifm_counts.csv", P5 = "GSM2985849_P5.raw_umifm_counts.csv") convmts <- list() for(i in c("P1","P1CD71","P2","P5")){ obj <- read.csv(file = infiles[[i]], header = T, stringsAsFactors = F, quote = "", as.is = T) obj$CellID <- paste(obj$Sample, obj$Library, gsub(obj$Barcode_Seq, pattern = "-", replacement = ""), sep = "_") row.names(obj) <- obj$CellID obj$CellID <- NULL obj_t <- t(obj) mymat <- obj_t[10:nrow(obj_t),] ids <- rownames(mymat) human_symbols = ids %>% nichenetr::convert_mouse_to_human_symbols() names(human_symbols) <- NULL mymat <- mymat[!is.na(human_symbols),] rownames(mymat) <- human_symbols[!is.na(human_symbols)] #mymat <- noquote(mymat) convmts[[i]] <- as.data.frame(mymat) } saveRDS(object = convmts, file = "convmts.rds") full <- merge.data.frame(convmts$P1, convmts$P1CD71, by = 0, sort = F) rownames(full) <- full$Row.names full$Row.names <- NULL full <- merge.data.frame(full, convmts$P2, by = 0, sort = F) rownames(full) <- full$Row.names full$Row.names <- NULL full <- merge.data.frame(full, convmts$P5, by = 0, sort = F) rownames(full) <- full$Row.names full$Row.names <- NULL full_num <- full full_num <- apply(full, 2, as.numeric) rownames(full_num) <- rownames(full) # create Expression set library(Biobase) minimalSet <- ExpressionSet(assayData=full_num) saveRDS(minimalSet,file = "minimalset.rds")