Chapter 4 Seurat QC Cell-level Filtering
library(Seurat)
library(data.table)
library(tidyverse)
library(magrittr)
library(gridExtra)
4.1 Description
Basic quality control for snRNA-seq: check the distribution of
number of UMIs per cell
- should above 500
number of genes detected per cell
number of genes detected per UMI
- check the complexity. outlier cells might be cells have less complex RNA species like red blood cells. expected higher than 0.8
mitochondrial ratio
- dead or dying cells will cause large amount of mitochondrial contamination
4.2 Load seurat object
<- get(load('data/Demo_CombinedSeurat_SCT_Preprocess.RData')) combined
4.3 Add other meta info
- fraction of reads mapping to mitochondrial gene
# for macaque, not all genes start with MT is mitochondrion genes
<- c("MTARC2","MTFR1L","MTERF1","MTFR2","MTRF1L","MTRES1",
mt.gene "MTO1","MTCH1","MTFMT","MTFR1","MTERF3","MTERF2","MTPAP",
"MTERF4","MTCH2",'MTIF2',"MTG2","MTIF3","MTRF1","MTCL1")
"percent.mt"]] <- PercentageFeatureSet(combined, features = mt.gene ) combined[[
- number of genes detected per UMI
$log10GenesPerUMI <- log10(combined$nFeature_RNA) / log10(combined$nCount_RNA) combined
4.4 Violin plots to check
- get the meta data
<- as.data.table(combined@meta.data)
df <- c("orig.ident", "nCount_RNA", "nFeature_RNA", "percent.mt", "log10GenesPerUMI")
sel <- df[, sel, with = FALSE]
df 1:3, ] df[
## orig.ident nCount_RNA nFeature_RNA percent.mt log10GenesPerUMI
## 1: SeuratProject 2740 1705 0.10795250 0.9400695
## 2: SeuratProject 3140 1687 0.09593860 0.9228424
## 3: SeuratProject 2539 1456 0.03738318 0.9290675
- define plotting function
<- 10
fontsize <- 0.35
linesize
<- df[, 2:5] %>% imap( ~ {
gp.ls
# define lable fun
<- function(x) {
give.n return(c(y = median(x) + max(x) / 10, label = round(median(x), 2)))
}
# assign colors
<-
col.ls setNames(
c('lightpink2', 'lightblue2', 'lightgreen', 'coral1'),
c("nCount_RNA", "nFeature_RNA", "percent.mt", "log10GenesPerUMI")
)
ggplot(data = df, aes(x = orig.ident, y = .x)) +
geom_violin(trim = FALSE, fill = col.ls[.y]) +
ggtitle(label = .y) + ylab(label = .y) +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
panel.border = element_blank()
+
) theme(
axis.text = element_text(size = fontsize),
axis.line = element_line(colour = "black", size = linesize),
axis.ticks = element_line(size = linesize),
axis.title.x = element_blank(),
axis.ticks.length = unit(.05, "cm"),
plot.title = element_text(size = fontsize + 2, hjust = 0.5),
legend.position = 'none'
+
) stat_summary(fun = median, geom = "point", col = "black") + # Add points to plot
stat_summary(fun.data = give.n,
geom = "text",
col = "black")
})
grid.arrange(gp.ls[[1]], gp.ls[[2]], gp.ls[[3]], gp.ls[[4]], ncol = 2)