## ----setup, echo=FALSE--------------------------------------------------- library(LearnBioconductor) stopifnot(BiocInstaller::biocVersion() == "3.0") ## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() ## ----benchmark----------------------------------------------------------- f0 <- function(n) { ## inefficient! ans <- numeric() for (i in seq_len(n)) ans <- c(ans, exp(i)) ans } f1 <- function(n) { ans <- numeric(n) for (i in seq_len(n)) ans[[i]] <- exp(i) ans } f2 <- function(n) sapply(seq_len(n), exp) f3 <- function(n) exp(seq_len(n)) ## ----parallel-sleep------------------------------------------------------ library(BiocParallel) fun <- function(i) { Sys.sleep(1) i } ## serial f0 <- function(n) lapply(seq_len(n), fun) ## parallel f1 <- function(n) bplapply(seq_len(n), fun) ## ----count-overlaps-roi, eval=FALSE-------------------------------------- # library(TxDb.Hsapiens.UCSC.hg19.knownGene) # exByTx <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx") # # map0 <- read.delim("~/igv/genomes/hg19_alias.tab", header=FALSE, # stringsAsFactors=FALSE) # map <- setNames(map0$V1, map0$V2) # seqlevels(exByTx, force=TRUE) <- map ## ----count-overlaps, eval=FALSE------------------------------------------ # count1 <- function(filename, roi) { # ## Create and open BAM file # bf <- BamFile(filename, yieldSize=1000000) # open(bf) # # ## initialize variables # n <- 0 # number of reads examined # count <- integer(length(roi)) # running count of reads overlapping roi # names(counts) <- names(roi) # # ## read in and count chunks of data, until done # repeat { # ## input # aln <- readGAlignments(bf) # input next chunk # if (length(aln) == 0) # stopping condition # break # n <- n + length(aln) # how are we doing? # message(n) # # ## overlaps # olaps <- findOverlaps(aln, roi, type="within", ignore.strand=TRUE) # count <- count + tabulate(subjectHits(olaps), subjectLength(olaps)) # } # # ## finish and return result # close(bf) # count # } ## ----count-overlaps-doit, eval=FALSE------------------------------------- # filename <- "~/bam/SRR1039508_sorted.bam" # count <- count1(filename, exByTx) ## ----count-overlaps-parallel, eval=FALSE--------------------------------- # library(BiocParallel) # # ## all bam files # filenames <- dir("~/bam", pattern="bam$", full=TRUE) # names(filenames) <- sub("_sorted.bam", "", basename(filenames)) # # ## iterate # counts <- bplapply(filenames, count1, exByTx) # counts <- simplify2array(counts) # head(counts)