###### 1_PreProcessing_Data ##### library(SeuratObject) library(Seurat) library(harmony) library(grid) library(cowplot) library(gridExtra) library(RColorBrewer) library(patchwork) library(scales) ######################## ### General Settings ### ######################## # Working dir wdir <- "amodio_infertility_scrnaseq_2024" # Raw data dir (download from GEO) raw_dir <- paste(wdir, "scRNAseq_processed_data", sep = "/") # Results dir out_dir <- paste(wdir, "results", sep = "/") dir.create(path = out_dir, showWarnings = F) sample_info <- data.frame("samples" = c("FER1", "FER2", "FER3", "FER4", "FER5", "OAT1", "OAT2", "OAT3", "OAT4", "NOA1", "NOA2", "NOA3", "NOA4", "NOA5", "NOA6"), "groups" = c("FER", "FER", "FER", "FER", "FER", "OAT", "OAT", "OAT", "OAT", "NOA", "NOA", "NOA", "NOA", "NOA", "NOA")) # Analysis params min_cell<- 20 # Min number of cells to include a feature min.feature <- as.numeric(1200) # Min number of features to include a cell max.feature <- as.numeric(6000) # Max number of features to include a cell max.pc.mito <- as.numeric(10) # Max % of mitochondrial to include a cell mito.prefix = "MT-" max_pca <- as.numeric(100) # Maximum number of PCA computed num_pc= 55 # 55 Number of PCA to use (in tSNE, UMAP, ...) cc_rds <- "single-cell-rna/regev_lab_cell_cycle_human.rds" cc_regression <- "full" cluster_res= 1.2 # Cluster resolutions marker_pval= 1e-06 marker_logfc= 0 ############################# ### Create Seurat Objects ### ############################# #starting from GEO for (sample in sample_info$samples) { print(sample) group <- as.character(sample_info[sample_info$samples == sample, "groups"]) mtx_file <- paste(raw_dir, paste0(sample, "_matrix.mtx.gz"), sep = "/") if (!file.exists(mtx_file)) { stop(paste("Cannot find", sample, "MTX file:", mtx_file)) } bcode_file <- paste(raw_dir, paste0(sample, "_barcodes.tsv.gz"), sep = "/") if (!file.exists(bcode_file)) { stop(paste("Cannot find", sample, "barcode file:", bcode_file)) } feature_file <- paste(raw_dir, paste0(sample, "_features.tsv.gz"), sep = "/") if (!file.exists(feature_file)) { stop(paste("Cannot find", sample, "features file:", feature_file)) } ## create input minimal Seurat object sample_mtx <- ReadMtx(mtx = mtx_file, cells = bcode_file, features = feature_file) colnames(x = sample_mtx) <- paste(sample, colnames(x = sample_mtx), sep = '_') # add id tag to cellnames sample_obj <- CreateSeuratObject(counts = sample_mtx, project = sample) sample_obj[["RNA_Group"]] <- group sample_dir <- paste(out_dir, sample, sep = "/") dir.create(path = sample_dir, showWarnings = F) saveRDS(object = sample_obj, file = paste(sample_dir, paste0(sample, "_minimal.rds"), sep = "/")) }