## ----echo = FALSE, results = 'hide', warning = FALSE, message = FALSE---------
suppressPackageStartupMessages(library(BiocStyle))

## ----load_files1--------------------------------------------------------------
# Run scruff on example dataset
# NOTE: Requires Rsubread index and TxDb objects for the reference genome.
# For generation of these files, please refer to the Stepwise Tutorial.

library(scruff)

# Get the paths to example FASTQ, FASTA, and GTF files.
# Please note that because the following files are included in
# scruff R package, we use system.file() function to extract the paths
# to these files. If the user is running scruff on real data, the
# paths to the input FASTQ, FASTA, and GTF files should be provided,
# and there is no need to call system.file() function. For example,
# v1h1R1 <- "/PATH/TO/vandenBrink_1h1_L001_R1_001.fastq.gz"
# fasta <- "/Path/TO/GRCm38_MT.fa"
v1h1R1 <- system.file("extdata",
    "vandenBrink_1h1_L001_R1_001.fastq.gz",
    package = "scruff")
v1h1R2 <- system.file("extdata",
    "vandenBrink_1h1_L001_R2_001.fastq.gz",
    package = "scruff")
fasta <- system.file("extdata", "GRCm38_MT.fa", package = "scruff")
gtf <- system.file("extdata", "GRCm38_MT.gtf", package = "scruff")

## ----buildindex1, results = 'hide'--------------------------------------------
# Create index files for GRCm38_MT.
# For details, please refer to Rsubread user manual.
# Specify the basename for Rsubread index
indexBase <- "GRCm38_MT"
Rsubread::buildindex(basename = indexBase,
    reference = fasta,
    indexSplit = FALSE)

## ----scruff, eval = FALSE, results = 'hide', warning = FALSE, message = FALSE----
#  data(barcodeExample, package = "scruff")
#  
#  sce <- scruff(project = "example",
#      experiment = c("1h1"),
#      lane = c("L001"),
#      read1Path = c(v1h1R1),
#      read2Path = c(v1h1R2),
#      bc = barcodeExample,
#      index = indexBase,
#      unique = FALSE,
#      nBestLocations = 1,
#      reference = gtf,
#      bcStart = 1,
#      bcStop = 8,
#      bcEdit = 1,
#      umiStart = 9,
#      umiStop = 12,
#      umiEdit = 1,
#      keep = 75,
#      cellPerWell = c(rep(1, 46), 0, 0),
#      cores = 2,
#      verbose = TRUE)

## ----qc1, warning = FALSE, message = FALSE, results = 'hide'------------------
data(sceExample, package = "scruff")
qc <- qcplots(sceExample)

## ----loadfiles2---------------------------------------------------------------
library(scruff)

# Get the paths to example FASTQ files.
# Please note that because the following files are included in
# scruff R package, we use system.file() function to extract the paths
# to these files. If the user is running scruff on real data, the
# paths to the input FASTQ, FASTA, and GTF files should be provided,
# and there is no need to call system.file() function. For example,
# v1h1R1 <- "/PATH/TO/vandenBrink_1h1_L001_R1_001.fastq.gz"
# fasta <- "/Path/TO/GRCm38_MT.fa"
v1h1R1 <- system.file("extdata",
    "vandenBrink_1h1_L001_R1_001.fastq.gz",
    package = "scruff")
v1h1R2 <- system.file("extdata",
    "vandenBrink_1h1_L001_R2_001.fastq.gz",
    package = "scruff")

## ----de, results = 'hide', warning = FALSE, message = FALSE-------------------
data(barcodeExample, package = "scruff")
de <- demultiplex(project = "example",
    experiment = c("1h1"),
    lane = c("L001"),
    read1Path = c(v1h1R1),
    read2Path = c(v1h1R2),
    barcodeExample,
    bcStart = 1,
    bcStop = 8,
    bcEdit = 1,
    umiStart = 9,
    umiStop = 12,
    keep = 75,
    minQual = 10,
    yieldReads = 1e+06,
    verbose = TRUE,
    overwrite = TRUE,
    cores = 2)

## ----al_files, eval = FALSE, results = 'hide'---------------------------------
#  # Create index files for GRCm38_MT. For details, please refer to Rsubread
#  # user manual.
#  fasta <- system.file("extdata", "GRCm38_MT.fa", package = "scruff")
#  
#  # Create index files for GRCm38_MT.
#  # For details, please refer to Rsubread user manual.
#  # Specify the basename for Rsubread index
#  indexBase <- "GRCm38_MT"
#  Rsubread::buildindex(basename = indexBase,
#      reference = fasta,
#      indexSplit = FALSE)

## ----al, eval = FALSE, results = 'hide', warning = FALSE, message = FALSE-----
#  # Align the reads using Rsubread
#  al <- alignRsubread(de,
#      indexBase,
#      unique = FALSE,
#      nBestLocations = 1,
#      format = "BAM",
#      overwrite = TRUE,
#      verbose = TRUE,
#      cores = 2)

## ----co, eval = FALSE, results = 'hide', warning = FALSE, message = FALSE-----
#  gtf <- system.file("extdata", "GRCm38_MT.gtf", package = "scruff")
#  # get the molecular counts of trancsripts for each cell
#  # In experiment 1h1, cell barcodes 95 and 96 are empty well controls.
#  # In experiment b1, cell barcode 95 is bulk sample containing 300 cells.
#  sce <- countUMI(al,
#      gtf,
#      umiEdit = 1,
#      format = "BAM",
#      cellPerWell = c(rep(1, 46), 0, 0),
#      verbose = TRUE,
#      cores = 2)

## ----qc2, warning = FALSE, message = FALSE, echo = TRUE, results = "hide"-----
data(sceExample, package = "scruff")
qc <- qcplots(sceExample)
qc

## ----gtfEG, results = 'hide', warning = FALSE, message = FALSE----------------
# Visualize the reads mapped to gene "mt-Rnr2" in
# cell "vandenBrink_b1_cell_0095".
# bamExample is generated by GenomicAlignments::GAlignments function
data(bamExample, package = "scruff")

## ----visualize_reads----------------------------------------------------------
# gene mt-Rnr2 starts at 1094 and ends at 2675
start <- 1094
end <- 2675

g1 <- rview(bamExample, chr = "MT", start = start, end = end)
g2 <- gview(gtf, chr = "MT", start = start, end = end)
g <- ggbio::tracks(g1, g2, heights = c(4, 1), xlab = "chr MT")
g

## ----10x_bam_qc, warning = FALSE, message = FALSE-----------------------------
# The following example BAM file is the first 5000 BAM file records extracted
# from sample 01 of the 1.3 Million Brain Cells dataset from E18 Mice.
# (https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.3.0/
# 1M_neurons)
# The BAM file for sample 01 is downloaded from here:
# http://sra-download.ncbi.nlm.nih.gov/srapub_files/
# SRR5167880_E18_20160930_Neurons_Sample_01.bam
# see details here:
# https://trace.ncbi.nlm.nih.gov/Traces/sra/?study=SRP096558
# and here:
# https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE93421
bamfile10x <- system.file("extdata",
    "SRR5167880_E18_20160930_Neurons_Sample_01_5000.bam",
    package = "scruff")

# The filtered cell barcodes are generated using the following script:
# library(TENxBrainData)
# library(data.table)
# tenx <- TENxBrainData()
# # get filtered barcodes for sample 01
# filteredBcIndex <- tstrsplit(colData(tenx)[, "Barcode"], "-")[[2]] == 1
# filteredBc <- colData(tenx)[filteredBcIndex, ][["Barcode"]]
filteredBc <- system.file("extdata",
    "SRR5167880_E18_20160930_Neurons_Sample_01_filtered_barcode.tsv",
    package = "scruff")
# QC results are saved to current working directory
sce <- tenxBamqc(bam = bamfile10x,
    experiment = "Neurons_Sample_01",
    filter = filteredBc)
sce

## ----bamqc1, warning = FALSE, message = FALSE, echo = TRUE, results = "hide"----
g <- qcplots(sce)
g

## ----session, echo = FALSE----------------------------------------------------
sessionInfo()