0_PreProcessData.R

library(SeuratObject)
library(Seurat)
library(harmony)
library(DoubletFinder)
library(grid)
library(cowplot)
library(gridExtra)
library(RColorBrewer)
library(patchwork)
library(scales)

#########################
### Utility Functions ###
#########################
define_region <- function(row, col){
    viewport(layout.pos.row = row, layout.pos.col = col)
}

########################
### General Settings ###
########################
# Working dir
wdir <- "squadrito_livertumor2022_scrnaseq"
# Raw data dir (download from GEO)
raw_dir <- "GEO_data"
# Results dir
out_dir <- paste(wdir, "results", sep = "/")
dir.create(path = out_dir, showWarnings = F)
# Samples
sample_info <- data.frame("samples" = c("Sample3", "Sample4", "Sample7", "Sample10", "Sample11", "Sample14", "Sample19", "Sample22"),
                          "groups" = c("CTRL", "IFN_R", "CTRL", "IFN_PR", "IFN_PR", "IFN_R", "CTRL", "IFN_PR"))
# cell-cycle scoring - NicheNet convertion of Seurat cc.genes.updated.2019 lists
s.genes <- c("Mcm5","Pcna","Tyms","Fen1","Mcm7","Mcm4","Rrm1","Ung","Gins2","Mcm6",
             "Cdca7","Dtl","Prim1","Uhrf1","Cenpu","Hells","Rfc2","Polr1b","Nasp",
             "Rad51ap1","Gmnn","Wdr76","Slbp","Ccne2","Ubr7","Pold3","Msh2","Atad2",
             "Rad51","Rrm2","Cdc45","Cdc6","Exo1","Tipin","Dscc1","Blm","Casp8ap2",
             "Usp1","Clspn","Pola1","Chaf1b","Mrpl36","E2f8")
g2m.genes <- c("Hmgb2","Cdk1","Nusap1","Ube2c","Birc5","Tpx2","Top2a","Ndc80",
               "Cks2","Nuf2","Cks1b","Mki67","Tmpo","Cenpf","Tacc3","Pimreg",
               "Smc4","Ccnb2","Ckap2l","Ckap2","Aurkb","Bub1","Kif11","Anp32e",
               "Tubb4b","Gtse1","Kif20b","Hjurp","Cdca3","Jpt1","Cdc20","Ttk",
               "Cdc25c","Kif2c","Rangap1","Ncapd2","Dlgap5","Cdca2","Cdca8","Ect2",
               "Kif23","Hmmr","Aurka","Psrc1","Anln","Lbr","Ckap5","Cenpe","Ctcf",
               "Nek2","G2e3","Gas2l3","Cbx5","Cenpa")
reg_vars <- c("CC.Difference", "percent.mt", "nCount_RNA")

#############################
### Create Seurat Objects ###
#############################
for (sample in sample_info$samples) {
    print(sample)
    group <- as.character(sample_info[sample_info$samples == sample, "groups"])
    mtx_file <- paste(raw_dir, paste0(sample, "_matrix.mtx.gz"), sep = "/")
    if (!file.exists(mtx_file)) {
        stop(paste("Cannot find", sample, "MTX file:", mtx_file))
    }
    bcode_file <- paste(raw_dir, paste0(sample, "_barcodes.tsv.gz"), sep = "/")
    if (!file.exists(bcode_file)) {
        stop(paste("Cannot find", sample, "barcode file:", bcode_file))
    }
    feature_file <- paste(raw_dir, paste0(sample, "_features.tsv.gz"), sep = "/")
    if (!file.exists(feature_file)) {
        stop(paste("Cannot find", sample, "features file:", feature_file))
    }
    sample_mtx <- ReadMtx(mtx = mtx_file, cells = bcode_file, features = feature_file)
    colnames(x = sample_mtx) <- paste(sample, colnames(x = sample_mtx), sep = '_') # add id tag to cellnames
    sample_obj <- CreateSeuratObject(counts = sample_mtx, project = sample)
    sample_obj[["RNA_Group"]] <- group
    sample_dir <- paste(out_dir, sample, sep = "/")
    dir.create(path = sample_dir, showWarnings = F)
    saveRDS(object = sample_obj, file = paste(sample_dir, paste0(sample, "_final.rds"), sep = "/"))
}

####################################################
### Exploratory analysis for parameter selection ###
####################################################
# Perform DoubletFinder on each samples with different
# combinations of parameters
for (sample in sample_info$samples) {
    print(sample)
    sample_dir <- paste(out_dir, sample, sep = "/")
    plot_dir <- paste(sample_dir, "plots", sep = "/")
    dir.create(path = plot_dir, showWarnings = F)
    obj <- readRDS(paste(sample_dir, paste0(sample, "_final.rds"), sep = "/"))
    obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^mt-")
    obj <- CellCycleScoring(object = obj, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
    obj@meta.data$CC.Difference <- obj@meta.data$S.Score - obj@meta.data$G2M.Score
    Idents(obj) <- "orig.ident"
    png(paste(plot_dir, paste0(sample, "_prefilter.png"), sep = "/"), width = 1200, height = 1000)
    print(VlnPlot(obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1))
    dev.off()
    obj@misc$analysis_params$min.feature <- 1000
    obj@misc$analysis_params$max.feature <- 6000
    obj@misc$analysis_params$max.pc.mito <- 10
    obj <- subset(obj, subset = nFeature_RNA > obj@misc$analysis_params$min.feature &
                      nFeature_RNA < obj@misc$analysis_params$max.feature &
                      percent.mt < obj@misc$analysis_params$max.pc.mito)
    png(paste(plot_dir, paste0(sample, "_postfilter.png"), sep = "/"), width = 1200, height = 1000)
    print(VlnPlot(obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1))
    dev.off()
    obj <- SCTransform(obj, vars.to.regress = c("CC.Difference", "percent.mt", "nCount_RNA"), verbose = FALSE)
    DefaultAssay(object = obj) <- "SCT"
    obj@misc$analysis_params$max_pca <- 50
    obj <- RunPCA(object = obj, npcs = obj@misc$analysis_params$max_pca)
    png(paste(plot_dir, paste0(sample, "_PCA_elbowplot_SCT.png"), sep = "/"), width = 1200, height = 1000)
    print(ElbowPlot(object = obj, ndims = obj@misc$analysis_params$max_pca, reduction = "pca"))
    dev.off()
    obj@misc$analysis_params$num_pc <- 30
    obj <- RunUMAP(object = obj, dims = 1:obj@misc$analysis_params$num_pc)
    ## pK Identification (no ground-truth) --------------------------------------------------
    sweep.res.obj <- paramSweep_v3(obj, PCs = 1:obj@misc$analysis_params$num_pc, sct = TRUE)
    sweep.stats_obj <- summarizeSweep(sweep.res.obj, GT = FALSE)
    bcmvn_obj <- find.pK(sweep.stats_obj)
    dev.off()
    pK <- as.numeric(as.character(bcmvn_obj$pK))
    BCmetric <- bcmvn_obj$BCmetric
    pK_choose <- pK[which(BCmetric %in% max(BCmetric))]
    png(paste(plot_dir, paste0(sample, "_DF_SCT_pK_selection.png"), sep = "/"), width = 1200, height = 1000)
    par(mar = c(5,4,4,8)+1, cex.main = 1.2, font.main = 2)
    plot(x = pK, y = BCmetric, pch = 16, type = "b", col = "blue", lty = 1)
    abline(v = pK_choose, lwd = 2, col = 'red', lty = 2)
    title("The BCmvn distributions")
    text(pK_choose, max(BCmetric), as.character(pK_choose), pos = 4, col = "red")
    dev.off()
    for (pK_select in pK) {
        for (doub_pct in c(0.05, 0.07, 0.09)) {
            nExp <- round(ncol(obj) * doub_pct) #expected % doublet - approx
            obj <- doubletFinder_v3(obj, pN = 0.25, pK = pK_select, nExp = nExp, PCs = 1:obj@misc$analysis_params$num_pc, sct = TRUE)
            df_ident <- grep("DF.classification", colnames(obj@meta.data), value = T)[1]
            Idents(obj) <- df_ident
            new_cn <- paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select)
            colnames(obj@meta.data)[colnames(obj@meta.data) == df_ident] <- new_cn
            str(obj@meta.data)
            dp_df <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = new_cn, split.by = new_cn, ncol= 2) + NoLegend()
            dp_phase <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = "Phase", split.by = new_cn, ncol= 3)
            fp1 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd19"), split.by = new_cn, order = T, ncol= 2)
            fp2 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd3g"), split.by = new_cn, order = T, ncol= 2)
            fp3 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Itgam"), split.by = new_cn, order = T, ncol = 2)
            fp4 <- FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Alas2"), split.by = new_cn, order = T, ncol = 2)
            png(paste(plot_dir, paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select, ".png"), sep = "/"), height = 1500, width = 1700)
            plot.new()
            grid.newpage()
            pushViewport(viewport(layout = grid.layout(3, 2)))
            print(dp_phase, vp = define_region(row = 1, col = 1))
            print(dp_df, vp = define_region(row = 1, col = 2))
            print(fp1, vp = define_region(row = 2, col = 1))
            print(fp2, vp = define_region(row = 2, col = 2))
            print(fp3, vp = define_region(row = 3, col = 1))
            print(fp4, vp = define_region(row = 3, col = 2))
            dev.off()
        }
    }
    saveRDS(object = obj, paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
}

# Prepare some plots on DoubletFinder results
for (sample in sample_info$samples) {
    print(sample)
    sample_dir <- paste(out_dir, sample, sep = "/")
    plot_dir <- paste(sample_dir, "plots", sep = "/")
    obj = readRDS(paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
    for (pK_select in c(0.005, seq(0.01, 0.3, 0.01))) {
        for (doub_pct in c(0.05, 0.07, 0.09)) {
            nExp <- round(ncol(obj) * doub_pct)
            new_cn <- paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select)
            subtitle <- list("Full" = "", "Cd19" = "", "Cd3g" = "", "Itgam" = "", "Alas2" = "")
            num_title <- 0
            for (md_val in names(table(obj[[new_cn]]))) {
                if (num_title == 0) {
                    num_title <- 1
                    subtitle[["Full"]] <- paste(subtitle[["Full"]], paste(md_val, table(obj[[new_cn]])[[md_val]], sep = " : "), sep = "\n")
                    for (gg in c("Cd19", "Cd3g", "Itgam", "Alas2")) {
                        subtitle[[gg]] <- paste0(gg, ">0 :\t", subtitle[[gg]], paste(md_val, table(subset(obj, subset = !!sym(gg) > 0)[[new_cn]])[[md_val]], sep = "= "))
                    }
                } else {
                    subtitle[["Full"]] <- paste(subtitle[["Full"]], paste(md_val, table(obj[[new_cn]])[[md_val]], sep = " : "), sep = "\t|\t")
                    for (gg in c("Cd19", "Cd3g", "Itgam", "Alas2")) {
                        subtitle[[gg]] <- paste(subtitle[[gg]], paste(md_val, table(subset(obj, subset = !!sym(gg) > 0)[[new_cn]])[[md_val]], sep = "= "), sep = "\t| ")
                    }
                }
                num_title <- num_title + 1
            }
            Idents(obj) <- new_cn
            dp_df <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = new_cn, split.by = new_cn, ncol= 2) + NoLegend() + ggtitle(subtitle[["Full"]])
            dp_phase <- DimPlot(object = obj, reduction = "umap", pt.size = 1, group.by = "Phase", split.by = new_cn, ncol= 3)
            fp1 <- plot_grid(ggdraw() + draw_label(subtitle[["Cd19"]]),
                             FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd19"), split.by = new_cn, order = T, ncol= 2),
                             ncol = 1, rel_heights = c(0.1, 1))
            fp2 <- plot_grid(ggdraw() + draw_label(subtitle[["Cd3g"]]),
                             FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Cd3g"), split.by = new_cn, order = T, ncol= 2),
                             ncol = 1, rel_heights = c(0.1, 1))
            fp3 <- plot_grid(ggdraw() + draw_label(subtitle[["Itgam"]]),
                             FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Itgam"), split.by = new_cn, order = T, ncol = 2),
                             ncol = 1, rel_heights = c(0.1, 1))
            fp4 <- plot_grid(ggdraw() + draw_label(subtitle[["Alas2"]]),
                             FeaturePlot(object = obj, reduction = "umap", pt.size = 1, features = c("Alas2"), split.by = new_cn, order = T, ncol = 2),
                             ncol = 1, rel_heights = c(0.1, 1))
            png(paste(plot_dir, paste0("DF_SCT_nExp", doub_pct, "_pK", pK_select, ".png"), sep = "/"), height = 1500, width = 1700)
            plot.new()
            grid.newpage()
            pushViewport(viewport(layout = grid.layout(3, 2)))
            print(dp_phase, vp = define_region(row = 1, col = 1))
            print(dp_df, vp = define_region(row = 1, col = 2))
            print(fp1, vp = define_region(row = 2, col = 1))
            print(fp2, vp = define_region(row = 2, col = 2))
            print(fp3, vp = define_region(row = 3, col = 1))
            print(fp4, vp = define_region(row = 3, col = 2))
            dev.off()
        }
    }
}

# Select the best combinations of parameters of DoubletFinder
# for each sample
samples <- list("Sample3" = c("nExp" = 0.07, "pK" = 0.005),
                "Sample4" = c("nExp" = 0.09, "pK" = 0.005),
                "Sample7" = c("nExp" = 0.07, "pK" = 0.005),
                "Sample10" = c("nExp" = 0.09, "pK" = 0.01),
                "Sample11" = c("nExp" = 0.07, "pK" = 0.005),
                "Sample14" = c("nExp" = 0.09, "pK" = 0.005),
                "Sample19" = c("nExp" = 0.05, "pK" = 0.005),
                "Sample22" = c("nExp" = 0.09, "pK" = 0.02)
)
# Add metadata information on doublets in each sample
full_DoubFinder <- data.frame()
for (sample in names(samples)) {
    print(sample)
    sample_dir <- paste(out_dir, sample, sep = "/")
    obj <- readRDS(paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
    nExp <- round(ncol(obj) * samples[[sample]][["nExp"]]) #expected % doublet - approx
    obj@misc$analysis_params$doubletFinder <- list(pN = 0.25,
                                                   pK = samples[[sample]][["pK"]],
                                                   nExp = nExp,
                                                   nExpPerc = samples[[sample]][["nExp"]],
                                                   PCs = obj@misc$analysis_params$num_pc)
    new_cn <- paste0("DF_SCT_nExp", obj@misc$analysis_params$doubletFinder$nExpPerc, "_pK", obj@misc$analysis_params$doubletFinder$pK)
    df_sel_classif <- obj@meta.data[, new_cn, drop = F]
    print(head(df_sel_classif))
    colnames(df_sel_classif) <- "DoubFinderClassif"
    full_DoubFinder <- rbind(full_DoubFinder, df_sel_classif)
    saveRDS(object = obj, paste(sample_dir, paste0(sample, "_afterDoubletFinder.rds"), sep = "/"))
}


############################################
### Create Full (Combined) Seurat Object ###
############################################
samples_obj <- list()
for (sample in names(samples)) {
    print(sample)
    sample_dir <- paste(out_dir, sample, sep = "/")
    samples_obj[[sample]] <- readRDS(file = paste(sample_dir, paste0(sample, "_final.rds"), sep = "/"))
}
Full_obj <- merge(x = samples_obj[[1]], y = samples_obj[2:length(samples_obj)])
dir.create(path = paste(out_dir, "Full", sep = "/"))
saveRDS(object = Full_obj, paste(out_dir, "Full", "Full_final.rds", sep = "/"))

plot_dir <- paste(out_dir, "Full", "plots", sep = "/")
dir.create(path = plot_dir, showWarnings = F)
# Assign MT genes
Full_obj[["percent.mt"]] <- PercentageFeatureSet(Full_obj, pattern = "^mt-")
# Identify tresholds visualy and apply tresholds
Idents(Full_obj) <- "RNA_Group"
png(filename = paste(plot_dir, "Full_Prefilter_Vlnplot.png", sep = "/"), width = 1500, height = 1000)
VlnPlot(Full_obj, features = reg_vars, ncol = 3, pt.size = 0)
dev.off()
png(filename = paste(plot_dir, "Full_Prefilter_ScatterMT.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
dev.off()
png(filename = paste(plot_dir, "Full_Prefilter_ScatterFeatures.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
dev.off()

Full_obj@misc$analysis_params$min.feature <- 1000
Full_obj@misc$analysis_params$max.feature <- 6000
Full_obj@misc$analysis_params$max.pc.mito <- 10
table(Full_obj$orig.ident)
dim(Full_obj)
Full_obj <- subset(Full_obj, subset = nFeature_RNA > Full_obj@misc$analysis_params$min.feature &
                       nFeature_RNA < Full_obj@misc$analysis_params$max.feature &
                       percent.mt < Full_obj@misc$analysis_params$max.pc.mito)
table(Full_obj$orig.ident)
dim(Full_obj)
png(filename = paste(plot_dir, "Full_Postfilter_Vlnplot.png", sep = "/"), width = 1500, height = 1000)
VlnPlot(Full_obj, features = reg_vars, ncol = 3, pt.size = 0)
dev.off()
png(filename = paste(plot_dir, "Full_Postfilter_ScatterMT.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
dev.off()
png(filename = paste(plot_dir, "Full_Postfilter_ScatterFeatures.png", sep = "/"), width = 1200, height = 800)
FeatureScatter(Full_obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
dev.off()

## Import filtered object and process (SCTransform, runPCA)
# normalize, Scaledata are not computed, since we are not doing diff expression at this point
Full_obj <- SCTransform(Full_obj, vars.to.regress = reg_vars, verbose = TRUE)
DefaultAssay(object = Full_obj) <- "SCT"
Full_obj@misc$analysis_params$max_pca <- 50
Full_obj <- RunPCA(object = Full_obj, npcs = Full_obj@misc$analysis_params$max_pca)
png(filename = paste(plot_dir, "Full_ElbowPlot.png", sep = "/"), width = 1200, height = 800)
ElbowPlot(Full_obj, ndims = Full_obj@misc$analysis_params$max_pca, reduction = "pca")
dev.off()
Full_obj@misc$analysis_params$num_pc <- 40

## Import Doublets
nrow(full_DoubFinder)
dim(Full_obj)

full_DoubFinder_md <- full_DoubFinder$DoubFinderClassif
names(full_DoubFinder_md) <- rownames(full_DoubFinder)
Full_obj <- AddMetaData(object = Full_obj, metadata = full_DoubFinder_md, col.name = "DoubletFinder_classification")
colnames(Full_obj@meta.data)

subtitle <- ""
num_title <- 0
for (md_val in names(table(Full_obj[["DoubletFinder_classification"]]))) {
    if (num_title == 0) {
        num_title <- 1
        subtitle <- paste(subtitle, paste(md_val, table(Full_obj[["DoubletFinder_classification"]])[[md_val]], sep = " : "), sep = "\n")
    } else {
        subtitle <- paste(subtitle, paste(md_val, table(Full_obj[["DoubletFinder_classification"]])[[md_val]], sep = " : "), sep = "\t|\t")
    }
    num_title <- num_title + 1
}
png(paste(plot_dir, "Full_DoubletFinder_classification.png", sep = "/"), height = 1200, width = 1400, res = 100)
DimPlot(object = Full_obj, reduction = "umap", pt.size = 1, group.by = "DoubletFinder_classification") + ggtitle(subtitle) +
    annotation_custom(tableGrob(table(Full_obj$orig.ident, Full_obj$DoubletFinder_classification)), xmin=10, ymin=10)
dev.off()
png(paste(plot_dir, "Full_DoubletFinder_classification_split.png", sep = "/"), height = 1200, width = 1800)
DimPlot(object = Full_obj, reduction = "umap", pt.size = 1, group.by = "DoubletFinder_classification", split.by = "DoubletFinder_classification") + ggtitle(subtitle)
dev.off()

#####################
## Remove Doublets ##
#####################
Idents(Full_obj) <- "DoubletFinder_classification"
table(Full_obj$orig.ident, Full_obj$DoubletFinder_classification)
dim(Full_obj)
Full_obj <- subset(Full_obj, idents = "Singlet")
table(Full_obj$orig.ident)
dim(Full_obj)
## Process (normalize, Scaledata, SCTransform, runPCA)
# Scale data will be used when commputing top marker (we think)
Full_obj <- NormalizeData(object = Full_obj)
Full_obj <- ScaleData(object = Full_obj, vars.to.regress = reg_vars,
                      display.progress = T, features = rownames(Full_obj))
Full_obj <- SCTransform(Full_obj, vars.to.regress = reg_vars, verbose = TRUE)
DefaultAssay(object = Full_obj) <- "SCT"
Full_obj <- RunPCA(object = Full_obj, npcs = Full_obj@misc$analysis_params$max_pca)
ElbowPlot(Full_obj, ndims = Full_obj@misc$analysis_params$max_pca, reduction = "pca")
Full_obj <- RunUMAP(Full_obj, dims = 1:Full_obj@misc$analysis_params$num_pc, seed.use = 1,reduction = "pca")
Full_obj <- FindNeighbors(Full_obj ,reduction = "pca", dims = 1:Full_obj@misc$analysis_params$num_pc,
                          force.recalc = T, n.trees = 5, annoy.metric = "euclidean")

# Perform clustering on SCT
for (res in c(0.4, 0.6, 1.2)) {
    Full_obj <- FindClusters(Full_obj , resolution = res)
}

# We use the SCT_snn_res.1.2. that was already in the Full object as calculated by you.
# Annotate new labels on FULL OBJ after doublet removal
Full_obj@meta.data$CellID <- Full_obj@assays$RNA@counts@Dimnames[[2]]
Full_obj@meta.data$Newlabels <- rep("Undefined", length(Full_obj@meta.data$orig.ident))
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "0"] <- "B cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "1"] <- "B cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "2"] <- "B cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "3"] <- "B cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "4"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "5"] <- "B cells" 
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "6"] <- "B cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "7"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "8"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "9"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "10"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "11"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "12"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "13"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "14"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "15"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "16"] <- "B cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "17"] <- "Neutrophils"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "18"] <- "T and NK cells"  
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "19"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "20"] <- "T and NK cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "21"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "22"] <- "T and NK cells" 
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "23"] <- "Neutrophils"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "24"] <- "Erythroblasts" 
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "25"] <- "T and NK cells" 
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "26"] <- "APCs" 
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "27"] <- "LSECs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "28"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "29"] <- "T and NK cells" 
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "30"] <- "T and NK cells" 
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "31"] <- "B1a-like"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "32"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "33"] <- "Mast cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "34"] <- "Neutrophils"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "35"] <- "Cancer cells"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "36"] <- "APCs"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "37"] <- "Hepatocytes"
Full_obj@meta.data$Newlabels[Full_obj@meta.data$SCT_snn_res.1.2 == "38"] <- "Undefined"


##########################
## Export Table results ##
##########################
data_dir <- paste(wdir, "data", sep = "/")
dir.create(path = data_dir, showWarnings = F)
# Counts
full_counts <- Full_obj@assays$RNA@counts
gz_out_counts <-  gzfile(paste(data_dir, "Full_final_DBR_labeled_counts.csv.gz", sep = "/"), "w")
write.csv(full_counts, gz_out_counts)
close(gz_out_counts)
# Meta Data
full_md <- Full_obj@meta.data
gz_out_md <-  gzfile(paste(data_dir, "Full_final_DBR_labeled_metadata.csv.gz", sep = "/"), "w")
write.csv(x = full_md, gz_out_md)
close(gz_out_md)
# UMAP Coordinates
full_umap <- Full_obj@reductions$umap@cell.embeddings
gz_out_umap <-  gzfile(paste(data_dir, "Full_final_DBR_labeled_umap.csv.gz", sep = "/"), "w")
write.csv(x = full_umap, gz_out_umap)
close(gz_out_umap)

# Save final object
saveRDS(object = Full_obj, paste(out_dir, "Full", "Full_final_DBR_labeled.rds", sep = "/"))