1 Basic genomic regions exploration

Project: Example bumphunter.

2 Introduction

This report is meant to help explore a set of genomic regions and was generated using the regionReport (Collado-Torres, Jaffe, and Leek, 2016) package. While the report is rich, it is meant to just start the exploration of the results and exemplify some of the code used to do so. If you need a more in-depth analysis for your specific data set you might want to use the customCode argument.

Most plots were made with using ggplot2 (Wickham, 2009).

2.1 Code setup

## knitrBoostrap and device chunk options
load_install('knitr')
opts_chunk$set(bootstrap.show.code = FALSE, dev = device)
if(!outputIsHTML) opts_chunk$set(bootstrap.show.code = FALSE, dev = device, echo = FALSE)
#### Libraries needed

## Bioconductor
load_install('bumphunter')
load_install('derfinder')
load_install('derfinderPlot')
load_install('GenomeInfoDb')
load_install('GenomicRanges')
load_install('ggbio')

## Transcription database to use by default
if(is.null(txdb)) {
    load_install('TxDb.Hsapiens.UCSC.hg19.knownGene')
    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene
}

## CRAN
load_install('ggplot2')
if(!is.null(theme)) theme_set(theme)
load_install('grid')
load_install('gridExtra')
load_install('knitr')
load_install('RColorBrewer')
load_install('mgcv')
load_install('whisker')
load_install('DT')
load_install('devtools')

## Working behind the scenes
# load_install('knitcitations')
# load_install('rmarkdown')
## Optionally
# load_install('knitrBootstrap')

#### Code setup

## For ggplot
tmp <- regions
names(tmp) <- seq_len(length(tmp))
regions.df <- as.data.frame(tmp)
regions.df$width <- width(tmp)
rm(tmp)

## Special subsets: need at least 3 points for a density plot
keepChr <- table(regions.df$seqnames) > 2
regions.df.plot <- subset(regions.df, seqnames %in% names(keepChr[keepChr]))

if(hasSignificant) {
    ## Keep only those sig
    regions.df.sig <- regions.df[significantVar, ]
    keepChr <- table(regions.df.sig$seqnames) > 2
    regions.df.sig <- subset(regions.df.sig, seqnames %in% names(keepChr[keepChr]))
}

## Find which chrs are present in the data set
chrs <- levels(seqnames(regions))

## areaVar initialize
areaVar <- NULL

3 Quality checks

3.1 Region width

p2a <- ggplot(regions.df.plot, aes(x=log10(width), colour=seqnames)) +
    geom_line(stat='density') + labs(title='Density of region lengths') +
    xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) +
    theme(legend.title=element_blank())
p2a