Chapter 5 Scrublet Doublet Validation
library(Seurat)
library(tidyverse)
library(magrittr)
library(data.table)
5.1 Description
check the doublet prediction from
scrublet
bydimension reduction plot
nUMI distribution
judge the component for doublet cells by
DEG heatmap
canonical gene expression
5.2 Load seurat object
<- get(load('data/Demo_CombinedSeurat_SCT_Preprocess.RData'))
combined Idents(combined) <- "cluster"
5.3 Validate the doublet prediction
# check whether the double cells cluster together
FeaturePlot(combined, features = "DoubletScores", pt.size = 0.01)
DimPlot(
combined,group.by = "DoubletPrediction",
pt.size = 0.01,
cols = c("red", "azure3")
)
# check the nUMI for doublet and singlet
VlnPlot(combined,
features = "nCount_RNA",
pt.size = 0,
group.by = "DoubletPrediction") + NoLegend()
5.4 Calculate factions of doublet per cluster
<- data.table(combined@meta.data)
df <- c("DoubletPrediction", "cluster", "Individual")
sel.meta <- df[, sel.meta, with = FALSE]
df
2:3] %>% map( ~ {
df[, <- df[, .N, keyby = .(.x, DoubletPrediction)]
freq1 := sum(N), by = .(.x)]
freq1[, total := N / total]
freq1[, ratio
= .35
linesize = 8
fontsize
ggplot(freq1, aes(fill=DoubletPrediction, y=ratio, x= .x)) +
geom_bar(position="stack", stat="identity")+
scale_fill_manual(values = c("Doublet" = 'red', "Singlet" = "grey")) +
xlab('Clsuter') +
scale_y_continuous(breaks = seq(0,1,0.1), expand = c(0,0), name = 'Percentage')+
theme_bw()+
theme( panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),panel.border = element_rect(size = linesize),
axis.ticks = element_blank(),
axis.text.x = element_text(size = 5))
})
## $cluster
##
## $Individual
5.5 Explore the component clusters for doublets by DEG
- get the DEG for inferred source clusters. Here, for C33, InCGE and InMGE
# find DEG
<- FindMarkers(combined, ident.1 = c("InMGE"), ident.2 = "InCGE", min.pct = 0.25)
cluster.markers
# subset cells of interest
<- c("InMGE", "InCGE", "D33")
sel.idents <- subset(combined, cells = WhichCells(combined, idents = sel.idents))
combined.small
# check the expression for top DEG
#sel.cells <- WhichCells(combined.small, idents = sel.idents, downsample = 355) # for large dataset
DoHeatmap(combined.small, features = rownames(cluster.markers)[1:40], raster = F)
5.6 Explore the component clusters for doublets by canonical gene
<- c("NXPH1", "PAM", "LHX6", "NR2F2", "ADARB2", "PROX1")
sel.feature FeaturePlot(combined, features = sel.feature, pt.size = 0.01, ncol = 3)
VlnPlot(combined.small, features = sel.feature, pt.size = 0, ncol = 3, idents = sel.idents)