1_PreProcessing_Data.R 2.69 KB
Newer Older
Giorgia Giacomini's avatar
Giorgia Giacomini committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
###### 1_PreProcessing_Data #####


library(SeuratObject)
library(Seurat)
library(harmony)
library(grid)
library(cowplot)
library(gridExtra)
library(RColorBrewer)
library(patchwork)
library(scales)



########################
### General Settings ###
########################
# Working dir

wdir <- "amodio_infertility_scrnaseq_2024"

# Raw data dir (download from GEO)
raw_dir <- paste(wdir, "scRNAseq_processed_data", sep = "/")
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)



31
32
sample_info <- data.frame("samples" = c("FER1", "FER2", "FER3", "FER4", "FER5", "OAT1", "OAT2", "OAT3", "OAT4", "NOA1", "NOA2", "NOA3", "NOA4", "NOA5", "NOA6"),
 "groups" = c("FER", "FER", "FER", "FER", "FER", "OAT", "OAT", "OAT", "OAT", "NOA", "NOA", "NOA", "NOA", "NOA", "NOA"))
Giorgia Giacomini's avatar
Giorgia Giacomini committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80



# Analysis params
min_cell<- 20 #  Min number of cells to include a feature 
min.feature <- as.numeric(1200) # Min number of features to include a cell
max.feature <- as.numeric(6000) # Max number of features to include a cell
max.pc.mito <- as.numeric(10) #  Max % of mitochondrial to include a cell
mito.prefix = "MT-"
max_pca <- as.numeric(100) # Maximum number of PCA computed
num_pc= 55 # 55 Number of PCA to use (in tSNE, UMAP, ...)
cc_rds <- "single-cell-rna/regev_lab_cell_cycle_human.rds" 
cc_regression <- "full"
cluster_res= 1.2 # Cluster resolutions
marker_pval= 1e-06
marker_logfc= 0


#############################
### Create Seurat Objects ###
#############################
#starting from GEO

for (sample in sample_info$samples) {
  print(sample)
  group <- as.character(sample_info[sample_info$samples == sample, "groups"])
  mtx_file <- paste(raw_dir, paste0(sample, "_matrix.mtx.gz"), sep = "/")
  if (!file.exists(mtx_file)) {
    stop(paste("Cannot find", sample, "MTX file:", mtx_file))
  }
  bcode_file <- paste(raw_dir, paste0(sample, "_barcodes.tsv.gz"), sep = "/")
  if (!file.exists(bcode_file)) {
    stop(paste("Cannot find", sample, "barcode file:", bcode_file))
  }
  feature_file <- paste(raw_dir, paste0(sample, "_features.tsv.gz"), sep = "/")
  if (!file.exists(feature_file)) {
    stop(paste("Cannot find", sample, "features file:", feature_file))
  }
  ## create input minimal Seurat object
  sample_mtx <- ReadMtx(mtx = mtx_file, cells = bcode_file, features = feature_file)
  colnames(x = sample_mtx) <- paste(sample, colnames(x = sample_mtx), sep = '_') # add id tag to cellnames
  sample_obj <- CreateSeuratObject(counts = sample_mtx, project = sample)
  sample_obj[["RNA_Group"]] <- group
  sample_dir <- paste(out_dir, sample, sep = "/")
  dir.create(path = sample_dir, showWarnings = F)
  saveRDS(object = sample_obj, file = paste(sample_dir, paste0(sample, "_minimal.rds"), sep = "/"))
}