## ----setup, echo=FALSE--------------------------------------------------- knitr::opts_chunk$set(cache=TRUE) ## ----phiX---------------------------------------------------------------- library(Biostrings) data(phiX174Phage) ## ----consensusMatrix----------------------------------------------------- m <- consensusMatrix(phiX174Phage)[1:4,] polymorphic <- which(colSums(m != 0) > 1) mapply(substr, polymorphic, polymorphic, MoreArgs=list(x=phiX174Phage)) ## ----ranges, message=FALSE----------------------------------------------- require(GenomicRanges) gr <- GRanges("A", IRanges(c(10, 20, 22), width=5), "+") shift(gr, 1) # 1-based coordinates! range(gr) # intra-range reduce(gr) # inter-range coverage(gr) setdiff(range(gr), gr) # 'introns' ## ----BSgenome-require, message=FALSE------------------------------------- require(BSgenome.Hsapiens.UCSC.hg19) chr14_range = GRanges("chr14", IRanges(1, seqlengths(Hsapiens)["chr14"])) chr14_dna <- getSeq(Hsapiens, chr14_range) letterFrequency(chr14_dna, "GC", as.prob=TRUE) ## ----bam-require--------------------------------------------------------- require(GenomicRanges) require(GenomicAlignments) require(Rsamtools) ## our 'region of interest' roi <- GRanges("chr14", IRanges(19653773, width=1)) ## sample data require('RNAseqData.HNRNPC.bam.chr14') bf <- BamFile(RNAseqData.HNRNPC.bam.chr14_BAMFILES[[1]], asMates=TRUE) ## alignments, junctions, overlapping our roi paln <- readGAlignmentsList(bf) j <- summarizeJunctions(paln, with.revmap=TRUE) j_overlap <- j[j %over% roi] ## supporting reads paln[j_overlap$revmap[[1]]] ## ----vcf, message=FALSE-------------------------------------------------- ## input variants require(VariantAnnotation) fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation") vcf <- readVcf(fl, "hg19") seqlevels(vcf) <- "chr22" ## known gene model require(TxDb.Hsapiens.UCSC.hg19.knownGene) coding <- locateVariants(rowData(vcf), TxDb.Hsapiens.UCSC.hg19.knownGene, CodingVariants()) head(coding) ## ----summarizeOverlaps-roi, message=FALSE-------------------------------- require(TxDb.Hsapiens.UCSC.hg19.knownGene) exByGn <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene") ## only chromosome 14 seqlevels(exByGn, force=TRUE) = "chr14" ## ----summarizeOverlaps-bam, message=FALSE-------------------------------- require(RNAseqData.HNRNPC.bam.chr14) length(RNAseqData.HNRNPC.bam.chr14_BAMFILES) ## ----summarizeOverlaps--------------------------------------------------- ## next 2 lines optional; non-Windows library(BiocParallel) register(MulticoreParam(workers=detectCores())) olaps <- summarizeOverlaps(exByGn, RNAseqData.HNRNPC.bam.chr14_BAMFILES) ## ----summarizeOverlaps-explore------------------------------------------- olaps head(assay(olaps)) colSums(assay(olaps)) # library sizes plot(sum(width(olaps)), rowMeans(assay(olaps)), log="xy") ## ----summarizeOverlaps-gc------------------------------------------------ require(BSgenome.Hsapiens.UCSC.hg19) sequences <- getSeq(BSgenome.Hsapiens.UCSC.hg19, rowData(olaps)) gcPerExon <- letterFrequency(unlist(sequences), "GC") gc <- relist(as.vector(gcPerExon), sequences) gc_percent <- sum(gc) / sum(width(olaps)) plot(gc_percent, rowMeans(assay(olaps)), log="y")