## ----setup, echo=FALSE---------------------------------------------------
library(LearnBioconductor)
stopifnot(BiocInstaller::biocVersion() == "3.0")

## ----style, echo = FALSE, results = 'asis'-------------------------------
BiocStyle::markdown()
knitr::opts_chunk$set(tidy=FALSE)

## ----BSgenome-require, message=FALSE-------------------------------------
suppressPackageStartupMessages({  
    library(BSgenome.Hsapiens.UCSC.hg19)
})
chr14_range = GRanges("chr14", IRanges(1, seqlengths(Hsapiens)["chr14"]))
chr14_dna <- getSeq(Hsapiens, chr14_range)
letterFrequency(chr14_dna, "GC", as.prob=TRUE)

## ----ranges, message=FALSE-----------------------------------------------
suppressPackageStartupMessages({
    library(GenomicRanges)
})
gr <- GRanges("A", IRanges(c(10, 20, 22), width=5), "+")
shift(gr, 1)                            # 1-based coordinates!
range(gr)                               # intra-range
reduce(gr)                              # inter-range
coverage(gr)
setdiff(range(gr), gr)                  # 'introns'

## ----bam-require---------------------------------------------------------
suppressPackageStartupMessages({
    library(GenomicRanges)
    library(GenomicAlignments)
    library(Rsamtools)
})

## our 'region of interest'
roi <- GRanges("chr14", IRanges(19653773, width=1)) 
## sample data
suppressPackageStartupMessages({
    library('RNAseqData.HNRNPC.bam.chr14')
})
bf <- BamFile(RNAseqData.HNRNPC.bam.chr14_BAMFILES[[1]], asMates=TRUE)
## alignments, junctions, overlapping our roi
paln <- readGAlignmentsList(bf)
j <- summarizeJunctions(paln, with.revmap=TRUE)
j_overlap <- j[j %over% roi]

## supporting reads
paln[j_overlap$revmap[[1]]]

## ----vcf, message=FALSE--------------------------------------------------
## input variants
suppressPackageStartupMessages({
    library(VariantAnnotation)
})
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
vcf <- readVcf(fl, "hg19")
seqlevels(vcf) <- "chr22"
## known gene model
suppressPackageStartupMessages({
   library(TxDb.Hsapiens.UCSC.hg19.knownGene)
})
coding <- locateVariants(rowData(vcf),
    TxDb.Hsapiens.UCSC.hg19.knownGene,
    CodingVariants())
head(coding)

## ----require-------------------------------------------------------------
suppressPackageStartupMessages({
    library(GenomicRanges)
})

## ----help, eval=FALSE----------------------------------------------------
#    help(package="GenomicRanges")
#    vignette(package="GenomicRanges")
#    vignette(package="GenomicRanges", "GenomicRangesHOWTOs")
#    ?GRanges

## ----ShortRead, messages=FALSE-------------------------------------------
## 1. attach ShortRead and BiocParallel
suppressPackageStartupMessages({
    library(ShortRead)
    library(BiocParallel)
})

## 2. create a vector of file paths
fls <- dir("~/fastq", pattern="*fastq", full=TRUE)


## ----fakestats, eval=FALSE-----------------------------------------------
#  ## 3. collect statistics
#  stats0 <- qa(fls)

## ----realstats, echo=FALSE, results="hide"-------------------------------
data(stats0)

## ----browseStats---------------------------------------------------------
## 4. generate and browse the report
if (interactive())
    browseURL(report(stats0))

## ----ShortRead-qa-all----------------------------------------------------
data(qa_all)
if (interactive())
    browseURL(report(qa_all))

## ----setup-view, message=FALSE, warning=FALSE----------------------------
## 1.a 'Annotation' packages
suppressPackageStartupMessages({
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
    library(org.Hs.eg.db)
})

## 1.b -- map 'seqlevels' as recorded in the TxDb file to those in the
## BAM file
fl <- "~/igv/genomes/hg19_alias.tab"
map <- with(read.delim(fl, header=FALSE, stringsAsFactors=FALSE),
    setNames(V1, V2))
seqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, force=TRUE) <- map

## 2. Symbol -> Entrez ID -> Gene coordinates
sym2eg <- select(org.Hs.eg.db, "SPARCL1", "ENTREZID", "SYMBOL")
exByGn <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene")
sparcl1exons <- exByGn[[sym2eg$ENTREZID]]

## 3. Aligned reads
suppressPackageStartupMessages({
    library(GenomicAlignments)
})

fl <- "~/bam/SRR1039508_sorted.bam"
sparcl1gene <- range(sparcl1exons)
param <- ScanBamParam(which=sparcl1gene)
aln <- readGAlignmentPairs(fl, param=param)

## ----compatibleAlignments, warning=FALSE---------------------------------
## 5.a. exons-by-transcript for our gene of interest
txids <- select(TxDb.Hsapiens.UCSC.hg19.knownGene, sym2eg$ENTREZID,
    "TXID", "GENEID")$TXID
exByTx <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx")[txids]

## 5.b compatible alignments
hits <- findCompatibleOverlaps(query=aln, subject=exByTx)
good <- seq_along(aln) %in% queryHits(hits)
table(good)

## ----coding-sequence, warning=FALSE--------------------------------------
## reset seqlevels
restoreSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene)

## a. cds coordinates, grouped by transcript
txids <- select(TxDb.Hsapiens.UCSC.hg19.knownGene, sym2eg$ENTREZID,
    "TXID", "GENEID")$TXID
cdsByTx <- cdsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx")[txids]

## b. coding sequence from relevant reference genome
suppressPackageStartupMessages({
    library(BSgenome.Hsapiens.UCSC.hg19)
})

dna <- extractTranscriptSeqs(BSgenome.Hsapiens.UCSC.hg19, cdsByTx)
protein <- translate(dna)

## ----GenomicRanges-howtos, eval=FALSE------------------------------------
#  browseVignettes("GenomicRanges")

