library(SingleCellExperiment)
library(TabulaMurisSenisData)
library(ggplot2)

1 Introduction

This package provides access to the processed bulk and single-cell RNA-seq data from the Tabula Muris Senis data set (Schaum et al. 2019; Tabula Muris Consortium 2020). The processed bulk RNA-seq data was downloaded from GEO (accession number GSE132040). The single-cell data (droplet and FACS) was obtained from FigShare (see below for links). All data sets are provided as SingleCellExperiment objects for easy access and use within the Bioconductor ecosystem.

2 Bulk RNA-seq data

The bulk RNA-seq data can be accessed via the TabulaMurisSenisBulk() function. By setting the argument infoOnly to TRUE, we can get information about the size of the data set before downloading it.

tmp <- TabulaMurisSenisBulk(infoOnly = TRUE)
#> Total download size: 59.8 MiB
tms_bulk <- TabulaMurisSenisBulk()
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
tms_bulk
#> class: SingleCellExperiment 
#> dim: 54352 947 
#> metadata(0):
#> assays(1): counts
#> rownames(54352): 0610005C13Rik 0610006L08Rik ... n-TSaga9 n-TStga1
#> rowData names(8): source type ... havana_gene tag
#> colnames(947): A1_384Bulk_Plate1_S1 A1_384Bulk_Plate3_S1 ...
#>   P9_384Bulk_Plate2_S369 P9_384Bulk_Plate3_S369
#> colData names(19): Sample name title ... __alignment_not_unique organ
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):

We list the available tissues.

table(colData(tms_bulk)$organ)
#> 
#>             BAT            Bone           Brain             GAT           Heart 
#>              54              55              56              56              54 
#>          Kidney     Limb_Muscle           Liver            Lung             MAT 
#>              55              54              55              55              56 
#>          Marrow              NA        Pancreas            SCAT            Skin 
#>              54              14              56              56              51 
#> Small_Intestine          Spleen             WBC 
#>              55              56              55

3 Single-cell RNA-seq data

3.1 Droplet data

The data files for the droplet single-cell RNA-seq data were downloaded from FigShare:

We list the available tissues.

listTabulaMurisSenisTissues(dataset = "Droplet")
#>  [1] "All"             "Large_Intestine" "Pancreas"        "Trachea"        
#>  [5] "Skin"            "Fat"             "Thymus"          "Liver"          
#>  [9] "Heart_and_Aorta" "Mammary_Gland"   "Bladder"         "Lung"           
#> [13] "Kidney"          "Limb_Muscle"     "Spleen"          "Tongue"         
#> [17] "Marrow"

As for the bulk data, we can print the size of the data set before downloading it.

tmp <- TabulaMurisSenisDroplet(tissues = "All", infoOnly = TRUE)
#> Total download size (All): 709.0 MiB
tms_droplet <- TabulaMurisSenisDroplet(tissues = "All")
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> require("rhdf5")
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
#> see ?TabulaMurisSenisData and browseVignettes('TabulaMurisSenisData') for documentation
#> loading from cache
tms_droplet
#> $All
#> class: SingleCellExperiment 
#> dim: 20138 245389 
#> metadata(0):
#> assays(1): counts
#> rownames(20138): Xkr4 Rp1 ... Sly Erdr1
#> rowData names(6): n_cells means ... highly_variable varm
#> colnames(245389): AAACCTGCAGGGTACA-1-0-0-0 AAACCTGCAGTAAGCG-1-0-0-0 ...
#>   10X_P8_15_TTTGTCATCGGCTTGG-1 10X_P8_15_TTTGTCATCTTGTTTG-1
#> colData names(15): age cell ... louvain leiden
#> reducedDimNames(2): PCA UMAP
#> mainExpName: NULL
#> altExpNames(0):

We plot the UMAP of the entire data set and color by tissue, to re-create the plot from here.

# tissue colors
tissue_cols <- c(Pancreas = "#3182bd", Thymus = "#31a354", 
                 Trachea = "#636363", Bladder = "#637939", 
                 Lung = "#7b4173", Large_Intestine = "#843c39", 
                 Fat = "#969696", Tongue = "#a1d99b", 
                 Mammary_Gland = "#ce6dbd", Limb_Muscle = "#d6616b", 
                 Marrow = "#de9ed6", Skin = "#e6550d", 
                 Liver = "#e7969c", Heart_and_Aorta = "#e7ba52", 
                 Kidney = "#e7cb94", Spleen = "#fd8d3c")

# get dataset with all tissues
se <- tms_droplet$All
se 
#> class: SingleCellExperiment 
#> dim: 20138 245389 
#> metadata(0):
#> assays(1): counts
#> rownames(20138): Xkr4 Rp1 ... Sly Erdr1
#> rowData names(6): n_cells means ... highly_variable varm
#> colnames(245389): AAACCTGCAGGGTACA-1-0-0-0 AAACCTGCAGTAAGCG-1-0-0-0 ...
#>   10X_P8_15_TTTGTCATCGGCTTGG-1 10X_P8_15_TTTGTCATCTTGTTTG-1
#> colData names(15): age cell ... louvain leiden
#> reducedDimNames(2): PCA UMAP
#> mainExpName: NULL
#> altExpNames(0):

# prepare data set for ggplot
ds <- as.data.frame(reducedDim(se, "UMAP"))
ds <- cbind(ds, tissue = colData(se)$tissue)
head(ds)
#>                              UMAP1      UMAP2 tissue
#> AAACCTGCAGGGTACA-1-0-0-0 5.5556602 -10.160711 Tongue
#> AAACCTGCAGTAAGCG-1-0-0-0 2.9584570 -14.145093 Tongue
#> AAACCTGTCATTATCC-1-0-0-0 3.1235533 -14.481063 Tongue
#> AAACGGGGTACAGTGG-1-0-0-0 1.5939721 -14.062417 Tongue
#> AAACGGGGTCTTCTCG-1-0-0-0 0.5233619  -8.997872 Tongue
#> AAAGATGAGCTATGCT-1-0-0-0 1.0210617 -14.642970 Tongue

# plot
ggplot(ds, aes(x = UMAP1, y = UMAP2, color = tissue)) + 
  geom_point(size = 0.05) + 
  scale_color_manual(values = tissue_cols) + 
  theme_classic() + 
  guides(colour = guide_legend(override.aes = list(size = 5)))