## ----setup, echo=FALSE--------------------------------------------------- library(LearnBioconductor) stopifnot(BiocInstaller::biocVersion() == "3.0") ## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() knitr::opts_chunk$set(tidy=FALSE) ## ----Biostrings, message=FALSE------------------------------------------- suppressPackageStartupMessages({ library(Biostrings) }) data(phiX174Phage) # sample data, see ?phiX174Phage phiX174Phage m <- consensusMatrix(phiX174Phage)[1:4,] # nucl. x position counts polymorphic <- which(colSums(m != 0) > 1) m[, polymorphic] ## ----showMethods, eval=FALSE--------------------------------------------- # showMethods(class=class(phiX174Phage), where=search()) ## ----eg-GRanges---------------------------------------------------------- ## 'Annotation' package; more later... suppressPackageStartupMessages({ library(TxDb.Hsapiens.UCSC.hg19.knownGene) }) promoters <- promoters(TxDb.Hsapiens.UCSC.hg19.knownGene) ## 'GRanges' with 2 metadata columns promoters head(table(seqnames(promoters))) table(strand(promoters)) seqinfo(promoters) ## vector-like access promoters[ seqnames(promoters) %in% c("chr1", "chr2") ] ## metadata mcols(promoters) length(unique(promoters$tx_name)) ## ----eg-GRangesList------------------------------------------------------ ## exons, grouped by transcript exByTx <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx", use.names=TRUE) ## list-like subsetting exByTx[1:10] # also logical, character, ... exByTx[["uc001aaa.3"]] # also numeric ## accessors return typed-List, e.g., IntegerList width(exByTx) log10(width(exByTx)) ## 'easy' to ask basic questions, e.g., ... hist(unlist(log10(width(exByTx)))) # widths of exons exByTx[which.max(max(width(exByTx)))] # transcript with largest exon exByTx[which.max(elementLengths(exByTx))] # transcript with most exons ## ----eg-ExpressionSet---------------------------------------------------- suppressPackageStartupMessages({ library(ALL) }) data(ALL) ALL ## 'Phenotype' (sample) and 'feature' data head(pData(ALL)) head(featureNames(ALL)) ## access to pData columns; matrix-like subsetting; exprs() ALL[, ALL$sex %in% "M"] range(exprs(ALL)) ## 30% 'most variable' features (c.f., genefilter::varFilter) iqr <- apply(exprs(ALL), 1, IQR) ALL[iqr > quantile(iqr, 0.7), ] ## ----eg-SummarizedExperiment--------------------------------------------- suppressPackageStartupMessages({ library(airway) }) data(airway) airway ## column and row data colData(airway) rowData(airway) ## access colData; matrix-like subsetting; assay() / assays() airway[, airway$dex %in% "trt"] head(assay(airway)) assays(airway) ## library size colSums(assay(airway)) hist(rowMeans(log10(assay(airway)))) ## ----gc-reference-------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) chr1seq <- BSgenome.Hsapiens.UCSC.hg19[["chr1"]] chr1alf <- alphabetFrequency(chr1seq) chr1gc <- sum(chr1alf[c("G", "C")]) / sum(chr1alf[c("A", "C", "G", "T")]) ## ----gc-exons-1---------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) genes <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene) genes1 <- genes[seqnames(genes) %in% "chr1"] seq1 <- getSeq(BSgenome.Hsapiens.UCSC.hg19, genes1) alf1 <- alphabetFrequency(seq1, collapse=TRUE) gc1 <- sum(alf1[c("G", "C")]) / sum(alf1[c("A", "C", "G", "T")]) ## ----gc-exons-2---------------------------------------------------------- alf2 <- alphabetFrequency(seq1, collapse=FALSE) gc2 <- rowSums(alf2[, c("G", "C")]) / rowSums(alf2[,c("A", "C", "G", "T")]) ## ----gc-denisty---------------------------------------------------------- plot(density(gc2)) abline(v=c(chr1gc, gc1), col=c("red", "blue"), lwd=2) ## ----airway-plot--------------------------------------------------------- iqr <- apply(assay(airway), 1, IQR) airway1 <- airway[iqr > quantile(iqr, 0.7),] plot(density(rowMeans(asinh(assay(airway1))))) ## ----airway-rowttest----------------------------------------------------- suppressPackageStartupMessages({ library(genefilter) }) ttest <- rowttests(asinh(assay(airway1)), airway1$dex) ttest$p.adj <- p.adjust(ttest$p.value, method="BH") ttest[head(order(ttest$p.adj)),] split(assay(airway1)[order(ttest$p.adj)[1], ], airway1$dex) ## ----airway-merge-------------------------------------------------------- mcols(rowData(airway1)) <- ttest head(mcols(airway1))