1_PreProcessing_Data.R 2.7 KB
Newer Older
Giorgia Giacomini's avatar
Giorgia Giacomini committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
###### 1_PreProcessing_Data #####


library(SeuratObject)
library(Seurat)
library(harmony)
library(grid)
library(cowplot)
library(gridExtra)
library(RColorBrewer)
library(patchwork)
library(scales)



########################
### General Settings ###
########################
# Working dir

wdir <- "amodio_infertility_scrnaseq_2024"

# Raw data dir (download from GEO)
raw_dir <- paste(wdir, "scRNAseq_processed_data", sep = "/")
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)



sample_info <- data.frame("samples" = c("FER1", "FER2", "FER3", "FER4", "FER5", "OAT1", "OAT2", "OAT3", "OAT4", "iNOA1", "iNOA2", "iNOA3", "iNOA4", "iNOA5", "iNOA6"),
 "groups" = c("FER", "FER", "FER", "FER", "FER", "OAT", "OAT", "OAT", "OAT", "iNOA", "iNOA", "iNOA", "iNOA", "iNOA", "iNOA"))



# Analysis params
min_cell<- 20 #  Min number of cells to include a feature 
min.feature <- as.numeric(1200) # Min number of features to include a cell
max.feature <- as.numeric(6000) # Max number of features to include a cell
max.pc.mito <- as.numeric(10) #  Max % of mitochondrial to include a cell
mito.prefix = "MT-"
max_pca <- as.numeric(100) # Maximum number of PCA computed
num_pc= 55 # 55 Number of PCA to use (in tSNE, UMAP, ...)
cc_rds <- "single-cell-rna/regev_lab_cell_cycle_human.rds" 
cc_regression <- "full"
cluster_res= 1.2 # Cluster resolutions
marker_pval= 1e-06
marker_logfc= 0


#############################
### Create Seurat Objects ###
#############################
#starting from GEO

for (sample in sample_info$samples) {
  print(sample)
  group <- as.character(sample_info[sample_info$samples == sample, "groups"])
  mtx_file <- paste(raw_dir, paste0(sample, "_matrix.mtx.gz"), sep = "/")
  if (!file.exists(mtx_file)) {
    stop(paste("Cannot find", sample, "MTX file:", mtx_file))
  }
  bcode_file <- paste(raw_dir, paste0(sample, "_barcodes.tsv.gz"), sep = "/")
  if (!file.exists(bcode_file)) {
    stop(paste("Cannot find", sample, "barcode file:", bcode_file))
  }
  feature_file <- paste(raw_dir, paste0(sample, "_features.tsv.gz"), sep = "/")
  if (!file.exists(feature_file)) {
    stop(paste("Cannot find", sample, "features file:", feature_file))
  }
  ## create input minimal Seurat object
  sample_mtx <- ReadMtx(mtx = mtx_file, cells = bcode_file, features = feature_file)
  colnames(x = sample_mtx) <- paste(sample, colnames(x = sample_mtx), sep = '_') # add id tag to cellnames
  sample_obj <- CreateSeuratObject(counts = sample_mtx, project = sample)
  sample_obj[["RNA_Group"]] <- group
  sample_dir <- paste(out_dir, sample, sep = "/")
  dir.create(path = sample_dir, showWarnings = F)
  saveRDS(object = sample_obj, file = paste(sample_dir, paste0(sample, "_minimal.rds"), sep = "/"))
}