1. Introduction

Single-cell RNA sequencing (scRNA-seq) and Spatial RNA sequencing are widely used techniques for profiling gene expression in individual cells with their locations in the histological sections. These allow molecular biology to be studied at a resolution that cannot be matched by bulk sequencing of cell populations. To better visualize the result of reduction, spatial gene expression pattern in single cell or spatial experiment data, ggsc provides some layer functions based on the ggplot2 grammar. It can work with the SingleCellExperiment class or Seurat class, which are the widely used classes for storing data from single cell experiment.

2. Installation

To install ggsc package, please enter the following codes in R:

# Release
if (!requireNamespace('BiocManager', quietly = TRUE))
    install.package("BiocManager")

BiocManager::install("ggsc")

# Or for devel
if(!requireNamespace("remotes", quietly=TRUE)){
    install.packages("remotes")
}
remotes::install_github("YuLab-SMU/ggsc")

3. The data pre-processing

Here we use an example data from a single sample (sample 151673) of human brain dorsolateral prefrontal cortex (DLPFC) in the human brain, measured using the 10x Genomics Visium platform. First, a brief/standard data pre-processing were done with the scater and scran packages.

library(BiocParallel)
library(STexampleData)
library(scater)
library(scran)
library(ggplot2)


# create ExperimentHub instance
eh <- ExperimentHub()

# query STexampleData datasets
myfiles <- query(eh, "STexampleData")
spe <- myfiles[["EH7538"]]


spe <- addPerCellQC(spe, subsets=list(Mito=grep("^MT-", rowData(spe)$gene_name)))
colData(spe) |> head()
## DataFrame with 6 rows and 13 columns
##                            barcode_id     sample_id in_tissue array_row
##                           <character>   <character> <integer> <integer>
## AAACAACGAATAGTTC-1 AAACAACGAATAGTTC-1 sample_151673         0         0
## AAACAAGTATCTCCCA-1 AAACAAGTATCTCCCA-1 sample_151673         1        50
## AAACAATCTACTAGCA-1 AAACAATCTACTAGCA-1 sample_151673         1         3
## AAACACCAATAACTGC-1 AAACACCAATAACTGC-1 sample_151673         1        59
## AAACAGAGCGACTCCT-1 AAACAGAGCGACTCCT-1 sample_151673         1        14
## AAACAGCTTTCAGAAG-1 AAACAGCTTTCAGAAG-1 sample_151673         1        43
##                    array_col ground_truth cell_count       sum  detected
##                    <integer>  <character>  <integer> <numeric> <numeric>
## AAACAACGAATAGTTC-1        16           NA         NA       622       526
## AAACAAGTATCTCCCA-1       102       Layer3          6      8458      3586
## AAACAATCTACTAGCA-1        43       Layer1         16      1667      1150
## AAACACCAATAACTGC-1        19           WM          5      3769      1960
## AAACAGAGCGACTCCT-1        94       Layer3          2      5433      2424
## AAACAGCTTTCAGAAG-1         9       Layer5          4      4278      2264
##                    subsets_Mito_sum subsets_Mito_detected subsets_Mito_percent
##                           <numeric>             <numeric>            <numeric>
## AAACAACGAATAGTTC-1               37                     9              5.94855
## AAACAAGTATCTCCCA-1             1407                    13             16.63514
## AAACAATCTACTAGCA-1              204                    11             12.23755
## AAACACCAATAACTGC-1              430                    13             11.40886
## AAACAGAGCGACTCCT-1             1316                    13             24.22234
## AAACAGCTTTCAGAAG-1              651                    12             15.21739
##                        total
##                    <numeric>
## AAACAACGAATAGTTC-1       622
## AAACAAGTATCTCCCA-1      8458
## AAACAATCTACTAGCA-1      1667
## AAACACCAATAACTGC-1      3769
## AAACAGAGCGACTCCT-1      5433
## AAACAGCTTTCAGAAG-1      4278
colData(spe) |> data.frame() |> 
  ggplot(aes(x = sum, y = detected, colour = as.factor(in_tissue))) +
   geom_point() 

plotColData(spe, x='sum', y = 'subsets_Mito_percent', other_fields="in_tissue") + facet_wrap(~in_tissue)

Firstly, we filter the data to retain the cells that are in the tissue. Then cell-specific biases are normalized using the computeSumFactors method.

spe <- spe[, spe$in_tissue == 1]

clusters <- quickCluster(
              spe, 
              BPPARAM = BiocParallel::MulticoreParam(workers=2), 
              block.BPPARAM = BiocParallel::MulticoreParam(workers=2)
            )

spe <- computeSumFactors(spe, clusters = clusters, BPPARAM = BiocParallel::MulticoreParam(workers=2))
spe <- logNormCounts(spe)

Next, we use the Graph-based clustering method to do the reduction with the runPCA and runTSNE functions provided in the scater package.

# identify genes that drive biological heterogeneity in the data set by 
# modelling the per-gene variance
dec <- modelGeneVar(spe)

# Get the top 15% genes.
top.hvgs <- getTopHVGs(dec, prop=0.15)
spe <- runPCA(spe, subset_row=top.hvgs)

output <- getClusteredPCs(reducedDim(spe), BPPARAM = BiocParallel::MulticoreParam(workers=2))
npcs <- metadata(output)$chosen
npcs
## [1] 13
reducedDim(spe, "PCAsub") <- reducedDim(spe, "PCA")[,1:npcs,drop=FALSE]

g <- buildSNNGraph(spe, use.dimred="PCAsub", BPPARAM = MulticoreParam(workers=2))
cluster <- igraph::cluster_walktrap(g)$membership
colLabels(spe) <- factor(cluster)
set.seed(123)
spe <- runTSNE(spe, dimred="PCAsub", BPPARAM = MulticoreParam(workers=2))

Dimensional reduction plot

Here, we used the sc_dim function provided in the ggsc package to visualize the TSNE reduction result. Unlike other packages, ggsc implemented the ggplot2 graphic of grammar syntax and visual elements are overlaid through the combinations of graphic layers. The sc_dim_geom_label layer is designed to add cell cluster labels to a dimensional reduction plot, and can utilized different implementation of text geoms, such as geom_shadowtext in the shadowtext package and geom_text in the ggplot2 package (default) through the geom argument.

library(ggsc)
library(ggplot2)

sc_dim(spe, reduction = 'TSNE') + sc_dim_geom_label()

sc_dim(spe, reduction = 'TSNE') + 
  sc_dim_geom_label(
    geom = shadowtext::geom_shadowtext,
    color='black', 
    bg.color='white'
  )

Visualize ‘features’ on a dimensional reduction plot

To visualize the gene expression of cells in the result of reduction, ggsc provides sc_feature function to highlight on a dimensional reduction plot.

genes <- c('MOBP', 'PCP4', 'SNAP25', 'HBB', 'IGKC', 'NPY')
target.features <- rownames(spe)[match(genes, rowData(spe)$gene_name)]
sc_feature(spe, target.features[1], slot='logcounts', reduction = 'TSNE')

sc_feature(spe, target.features, slot='logcounts', reduction = 'TSNE')

In addition, it provides sc_dim_geom_feature layer working with sc_dim function to visualize the cells expressed the gene and the cell clusters information simultaneously.

sc_dim(spe, slot='logcounts', reduction = 'TSNE') +
   sc_dim_geom_feature(spe, target.features[1], color='black')

sc_dim(spe, alpha=.3, slot='logcounts', reduction = 'TSNE') + 
    ggnewscale::new_scale_color() + 
    sc_dim_geom_feature(spe, target.features, mapping=aes(color=features)) +
    scale_color_viridis_d()

It also provides sc_dim_geom_ellipse to add confidence levels of the the cluster result, and sc_dim_geom_sub to select and highlight a specific cluster of cells.

sc_dim(spe, reduction = 'TSNE') +
  sc_dim_geom_ellipse(level=0.95)

selected.cluster <- c(1, 6, 8)
sc_dim(spe, reduction = 'TSNE') +
  sc_dim_sub(subset=selected.cluster, .column = 'label')

sc_dim(spe, color='grey', reduction = 'TSNE') + 
  sc_dim_geom_sub(subset=selected.cluster, .column = 'label') + 
    sc_dim_geom_label(geom = shadowtext::geom_shadowtext, 
          mapping = aes(subset = label %in% selected.cluster),
            color='black', bg.color='white')  

Violin plot of gene expression

ggsc provides sc_violin to visualize the expression information of specific genes using the violin layer with common legend, the genes can be compared more intuitively.

sc_violin(spe, target.features[1], slot = 'logcounts')