suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(monocle3))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(viridis))
suppressPackageStartupMessages(library(outliers))
suppressPackageStartupMessages(library(R.utils))


# ED_Fig5B ----------------------------------------------------------------

## load scRNAseq data from public dataset GSE162950
load("Scarfo_HEC2023/scRNAseq/sample_final.Rdata")
sample <- SetIdent(sample, value = sample@meta.data$seurat_clusters)
DimPlot(sample, label=TRUE, group.by = "seurat_clusters")
sample@meta.data[["orig.ident"]] <- factor(sample@meta.data[["orig.ident"]], 
                                           levels = c("AGM_CS10_Liu",
                                                      "AGM_CS11_Liu",
                                                      "AGM_CS13_Liu",
                                                      "AGM_4wk_658",
                                                      "AGM_5wk_555",
                                                      "AGM_5wk_575",
                                                      "AGM_6wk_563",
                                                      "YolkSac_CS11_Liu"))
DimPlot(sample, 
        label=F,
        repel = T,
        reduction="umap", 
        group.by="orig.ident",
        pt.size=0.1,
        cols = c("AGM_CS10_Liu" = "#00BFC4",
                 "AGM_CS11_Liu" = "#00A9FF",
                 "AGM_CS13_Liu" = "#C77CFF",
                 "AGM_4wk_658" = "#F8766D",
                 "AGM_5wk_555" = "#CD9600",
                 "AGM_5wk_575" = "#7CAE00",
                 "AGM_6wk_563" = "#00BE67",
                 "YolkSac_CS11_Liu" = "#FF61CC"))


# ED_Fig5C ----------------------------------------------------------------

pal <- viridis(n = 10, option = "D")
FeaturePlot(sample,
            features = c("HOXA9", "HOXA10"),
            ncol = 2,
            repel = T,
            pt.size = 0.05,
            order = T,
            cols = pal)


# ED_Fig5E ----------------------------------------------------------------

devtools::install_github("sturgeonlab/Luff-etal-2021/SingleR/versioncontrolscripts")
library(versioncontrolscripts)

## load reference dataset
cd32_dll4 <- read.table("Scarfo_HEC2023/BulkRNAseq_2/featureCounts_results_tpm.txt", 
                        header=T, sep="\t")

## load Seurat data
load("Scarfo_HEC2023/scRNAseq/sample_final.Rdata")
sample <- SetIdent(sample, value = sample@meta.data$seurat_clusters)
sample_noYS <- subset(sample, orig.ident != "YolkSac_CS11_Liu")
sample_noYS@meta.data[["orig.ident"]] <-
  factor(
    sample_noYS@meta.data[["orig.ident"]],
    levels = c(
      "AGM_CS10_Liu",
      "AGM_CS11_Liu",
      "AGM_CS13_Liu",
      "AGM_4wk_658",
      "AGM_5wk_555",
      "AGM_5wk_575",
      "AGM_6wk_563"
    )
  )

## defining HEC and AEC
aec <- WhichCells(sample_noYS, 
                 expression = CDH5 > 0.2 & 
                   CXCR4 > 0.3 & 
                   GJA5 > 0.2 & 
                   DLL4 > 0.1 & 
                   PTPRC < 0.1 & 
                   SPN < 0.05 & 
                   HEY2 > 0.15)
hec <- WhichCells(sample_noYS, 
                  expression =  RUNX1 > 0.1 & 
                    CDH5 > 0.2 & 
                    HOXA9 > 0.05 & 
                    PTPRC < 0.1 & 
                    ITGA2B < 0.1 & 
                    SPN < 0.05)
Idents(sample_noYS, cells=aec) = "AEC"
Idents(sample_noYS, cells=hec) = "HEC"
sample_noYS$CellType <- Idents(sample_noYS)
sample_noYS <- subset(sample_noYS, CellType=="AEC" | CellType=="HEC")
Idents(sample_noYS) <- "CellType"
## setting up reference dataset
bulk <- cd32_dll4[,-c(2:6)]
bulk <- bulk[,c(1,2,3,4,6,7,9)]
colnames(bulk) <- c("genes",
                    "RS227_CD184","RS227_CD32_plus",
                    "RS242_CD184","RS242_CD32_plus",
                    "RS243_CD184","RS243_CD32_plus")
bulk$CD184 <- rowMeans(bulk[,c(2,4,6)])
bulk$CD32_plus <- rowMeans(bulk[,c(3,5,7)])
bulk <- bulk[,c(1,9,8)]
types = list("CD32_plus","CD184")
main_types = list("CD32_plus","CD184")
x = bulk[,1]
loss = bulk[,-1]
rownames(loss) = make.names(x, unique = TRUE)
bulk <- loss
bulk = as.matrix(bulk)
types = as.character(types)
main_types = as.character(main_types)
myref = list(data = bulk, main_types = main_types, types = types)
myref[["de.genes"]] = CreateVariableGeneSet(bulk,types,200)
myref[["de.genes.main"]] = CreateVariableGeneSet(bulk,main_types,300)
labels <- sample_noYS@meta.data$orig.ident
stage <- sample_noYS@meta.data$CellType
stage2 <- data.frame(stage)
stage2$stage <- as.character(stage2$stage)
stage2 <- stage2$stage
# create singlecell dataset for comparison to bulk RNAseq
data <- GetAssayData(sample_noYS, assay = "RNA", slot = "data")
data <- as.matrix(data)
# run main SingleR function & add names for cluster
obj <- SingleR.CreateObject(data, 
                            myref, 
                            clusters = NULL, 
                            variable.genes = "de", 
                            fine.tune = F)
obj[["names"]] <- stage2
obj[["names2"]] <- labels
data <- t(obj$SingleR.single.main$r)
cell_types = as.data.frame(obj$names)
colnames(cell_types) = 'CellTypes'
rownames(cell_types) = colnames(data)
test <- as.data.frame(obj$names2)
colnames(test) <- "Samples"
rownames(test) <- colnames(data)
clusters <- cbind(cell_types, test)
breaksList = seq(0, 1, by = 0.01)
# export relative correlation scores and names for each cell in cs dataset
scores = obj$SingleR.single.main$r
datascores <- as.matrix(scores)
scores <- data.frame(datascores)
type <- data.frame(obj$names)
cells <- data.frame(obj$names2)
scores$type <- type$obj.names
scores$cells <- cells$obj.names2

#successive sorting orders cells in aesthetically-pleasing manner
for (k in unique(scores$cells)) {
  clu <- filter(scores, cells == k)
  clu <- clu[order(clu$CD184), ]
  clu <- clu[order(clu$CD32_plus), ]
  clu_cells <- rownames(clu)
  assign(paste0("clu",k,"_cells"),clu_cells)
}

## reorder both clusters and HEC and AEC cells
agm_4 <- filter(scores, cells == "AGM_4wk_658")
agm_4_hec <- filter(agm_4, type == "HEC")
agm_4_hec <- agm_4_hec[order(agm_4_hec$CD184),]
agm_4_hec <- agm_4_hec[order(agm_4_hec$CD32_plus),]
agm_4_aec <- filter(agm_4, type == "AEC")
agm_4_aec <- agm_4_aec[order(agm_4_aec$CD184),]
agm_4_aec <- agm_4_aec[order(agm_4_aec$CD32_plus),]

agm_5_1 <- filter(scores, cells == "AGM_5wk_555")
agm_5_1_hec <- filter(agm_5_1, type == "HEC")
agm_5_1_hec <- agm_5_1_hec[order(agm_5_1_hec$CD184),]
agm_5_1_hec <- agm_5_1_hec[order(agm_5_1_hec$CD32_plus),]
agm_5_1_aec <- filter(agm_5_1, type == "AEC")
agm_5_1_aec <- agm_5_1_aec[order(agm_5_1_aec$CD184),]
agm_5_1_aec <- agm_5_1_aec[order(agm_5_1_aec$CD32_plus),]

agm_5_2 <- filter(scores, cells == "AGM_5wk_575")
agm_5_2_hec <- filter(agm_5_2, type == "HEC")
agm_5_2_hec <- agm_5_2_hec[order(agm_5_2_hec$CD184),]
agm_5_2_hec <- agm_5_2_hec[order(agm_5_2_hec$CD32_plus),]
agm_5_2_aec <- filter(agm_5_2, type == "AEC")
agm_5_2_aec <- agm_5_2_aec[order(agm_5_2_aec$CD184),]
agm_5_2_aec <- agm_5_2_aec[order(agm_5_2_aec$CD32_plus),]

agm_6 <- filter(scores, cells == "AGM_6wk_563")
agm_6_hec <- filter(agm_6, type == "HEC")
agm_6_hec <- agm_6_hec[order(agm_6_hec$CD184),]
agm_6_hec <- agm_6_hec[order(agm_6_hec$CD32_plus),]
agm_6_aec <- filter(agm_6, type == "AEC")
agm_6_aec <- agm_6_aec[order(agm_6_aec$CD184),]
agm_6_aec <- agm_6_aec[order(agm_6_aec$CD32_plus),]

agm_cs10 <- filter(scores, cells == "AGM_CS10_Liu")
agm_cs10_hec <- filter(agm_cs10, type == "HEC")
agm_cs10_hec <- agm_cs10_hec[order(agm_cs10_hec$CD184),]
agm_cs10_hec <- agm_cs10_hec[order(agm_cs10_hec$CD32_plus),]
agm_cs10_aec <- filter(agm_cs10, type == "AEC")
agm_cs10_aec <- agm_cs10_aec[order(agm_cs10_aec$CD184),]
agm_cs10_aec <- agm_cs10_aec[order(agm_cs10_aec$CD32_plus),]

agm_cs11 <- filter(scores, cells == "AGM_CS11_Liu")
agm_cs11_hec <- filter(agm_cs11, type == "HEC")
agm_cs11_hec <- agm_cs11_hec[order(agm_cs11_hec$CD184),]
agm_cs11_hec <- agm_cs11_hec[order(agm_cs11_hec$CD32_plus),]
agm_cs11_aec <- filter(agm_cs11, type == "AEC")
agm_cs11_aec <- agm_cs11_aec[order(agm_cs11_aec$CD184),]
agm_cs11_aec <- agm_cs11_aec[order(agm_cs11_aec$CD32_plus),]

agm_cs13 <- filter(scores, cells == "AGM_CS13_Liu")
agm_cs13_hec <- filter(agm_cs13, type == "HEC")
agm_cs13_hec <- agm_cs13_hec[order(agm_cs13_hec$CD184),]
agm_cs13_hec <- agm_cs13_hec[order(agm_cs13_hec$CD32_plus),]
agm_cs13_aec <- filter(agm_cs13, type == "AEC")
agm_cs13_aec <- agm_cs13_aec[order(agm_cs13_aec$CD184),]
agm_cs13_aec <- agm_cs13_aec[order(agm_cs13_aec$CD32_plus),]

agm_4_hec.cells <- row.names(agm_4_hec)
agm_4_aec.cells <- row.names(agm_4_aec)
agm_5_1_hec.cells <- row.names(agm_5_1_hec)
agm_5_1_aec.cells <- row.names(agm_5_1_aec)
agm_5_2_hec.cells <- row.names(agm_5_2_hec)
agm_5_2_aec.cells <- row.names(agm_5_2_aec)
agm_6_hec.cells <- row.names(agm_6_hec)
agm_6_aec.cells <- row.names(agm_6_aec)
agm_cs10_hec.cells <- row.names(agm_cs10_hec)
agm_cs10_aec.cells <- row.names(agm_cs10_aec)
agm_cs11_hec.cells <- row.names(agm_cs11_hec)
agm_cs11_aec.cells <- row.names(agm_cs11_aec)
agm_cs13_hec.cells <- row.names(agm_cs13_hec)
agm_cs13_aec.cells <- row.names(agm_cs13_aec)

order <- c(
  agm_cs10_aec.cells,
  agm_cs11_aec.cells,
  agm_cs13_aec.cells,
  agm_4_aec.cells,
  agm_5_1_aec.cells,
  agm_5_2_aec.cells,
  agm_6_aec.cells,
  agm_cs10_hec.cells,
  agm_cs11_hec.cells,
  agm_cs13_hec.cells,
  agm_4_hec.cells,
  agm_5_1_hec.cells,
  agm_5_2_hec.cells,
  agm_6_hec.cells
)
data_reorder <- data[,match(order, colnames(data))]
annot_colors <- list(CellTypes=c(HEC = "red",
                                 AEC = "royalblue3"),
                     Samples = c(AGM_CS10_Liu = "#00BFC4",
                                 AGM_CS11_Liu = "#00A9FF",
                                 AGM_CS13_Liu = "#C77CFF",
                                 AGM_4wk_658 = "#F8766D",
                                 AGM_5wk_555 = "#CD9600",
                                 AGM_5wk_575 = "#7CAE00",
                                 AGM_6wk_563 = "#00BE67"))
annot <- clusters
colours <- annot_colors
colAnn <- HeatmapAnnotation(df = clusters,
                            which = 'col',
                            show_annotation_name = T,
                            col = annot_colors,
                            annotation_legend_param = list(
                              CellTypes = list(
                                title = "Cell Type",
                                title_gp = gpar(fontsize = 14*96/72,
                                                fontface = "bold"), 
                                labels_gp = gpar(fontsize = 14*96/72),
                                grid_height = unit(0.7, "cm")),
                              Samples = list(
                                title = "Samples",
                                title_gp = gpar(fontsize = 14*96/72,
                                                fontface = "bold"), 
                                labels_gp = gpar(fontsize = 14*96/72),
                                grid_height = unit(0.7, "cm"))),
                            annotation_width = unit(c(1, 4), 'cm'),
                            gap = unit(1, 'mm'))
counts_scaled <- t(scale(t(data_reorder)))
counts_scaled_ordered <- counts_scaled[, match(rownames(clusters),colnames(counts_scaled))]
clusters$DF <- paste(clusters$CellTypes, clusters$Samples, sep = "_")
dd <- cluster_within_group(counts_scaled_ordered, clusters$DF)
hmap <- Heatmap(counts_scaled_ordered,
                name = "Z-score",
                show_column_names = F,
                show_column_dend = F,
                show_parent_dend_line = F,
                show_row_dend = F,
                cluster_rows = F,
                cluster_columns = dd,
                column_gap = unit(c(2,2,5,2,5,2,5,2,
                                    2,5,5,2,5), "mm"),
                top_annotation=colAnn,
                column_split = 14,
                clustering_method_columns = "ward.D2",
                heatmap_legend_param = list(
                  title = "Z-Scores",
                  title_gp = gpar(fontsize = 14*96/72,
                                  fontface = "bold"), 
                  labels_gp = gpar(fontsize = 12*96/72),
                  legend_height = unit(2, "cm")
                ),
                width = unit(15*96/72, "cm"), 
                height = unit(5*96/72, "cm"),
                use_raster = TRUE, # for raster quality, needs to be tested
                raster_quality = 5)
draw(hmap, heatmap_legend_side="right", annotation_legend_side="right")


# ED_Fig5F ----------------------------------------------------------------

## load scRNAseq data from public dataset GSE162950
load("Scarfo_HEC2023/scRNAseq/seurat_object.Rdata")
EHT_scorecard <- c("CXCR4","SOX17","GJA5","MECOM","HOXA9","SPN","CD44","ITGA2B","HLF","GFI1","MLLT3","KCNK17","MYB","STAT5A","SMIM24","RAB27B","SPINK2")
## Select HE cells
sample <- SetIdent(sample, value = sample@meta.data$seurat_clusters)
index1 = GetAssayData(sample, slot="counts")["RUNX1",]>0
index2 = GetAssayData(sample, slot="counts")["CDH5",]>0
index3 = GetAssayData(sample, slot="counts")["PTPRC",]==0
index4 = GetAssayData(sample, slot="counts")["FCGR2B",]==0
bars = colnames(sample)[Idents(sample) %in% c(0,3)]
bars1 = colnames(sample)[index1 & index2 & index3 & index4]
bars2 = intersect(bars, bars1)
## Establish identity for FCGR2B- HE cells
Idents(sample, cells=bars2) = "FCGR2B=0"
index5 = GetAssayData(sample, slot="counts")["FCGR2B",]>0
bars3 = colnames(sample)[index1 & index2 & index3 & index5]
bars4 = intersect(bars, bars3)
## Establish identity for FCGR2B+ HE cells
Idents(sample, cells=bars4) = "FCGR2B_exp"
sample$CellType <- Idents(sample)
sample_sub <- subset(sample, CellType=="FCGR2B_exp" | CellType=="FCGR2B=0")
DotPlot(sample_sub, 
        features=EHT_scorecard, 
        cols=c("grey90","red3"),
        dot.scale = 10) +
  coord_flip()