## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- BiocStyle::markdown() options(width=100, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE-------------------------------------------- suppressPackageStartupMessages({ library(AnnotationDbi) library(AnnotationHub) library(GenomicFeatures) library(biomaRt) library(org.Hs.eg.db) library(TxDb.Hsapiens.UCSC.hg19.knownGene) }) ## ----configure-test------------------------------------------------------------------------------- stopifnot( getRversion() >= '3.2' && getRversion() < '3.3', BiocInstaller::biocVersion() == "3.2" ) ## ----gene-model-discovery------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene txdb methods(class=class(txdb)) ## ----txdb-exons----------------------------------------------------------------------------------- exons(txdb) exonsBy(txdb, "tx") ## ----bsgenome------------------------------------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) genome <- BSgenome.Hsapiens.UCSC.hg19 getSeq(genome, exons(txdb)[1:100]) ## ----org------------------------------------------------------------------------------------------ library(org.Hs.eg.db) org.Hs.eg.db ## ----select--------------------------------------------------------------------------------------- select(org.Hs.eg.db, c("BRCA1", "PTEN"), c("ENTREZID", "GENENAME"), "SYMBOL") keytypes(org.Hs.eg.db) columns(org.Hs.eg.db) ## ----organismdb----------------------------------------------------------------------------------- library(Homo.sapiens) select(Homo.sapiens, c("BRCA1", "PTEN"), c("TXNAME", "TXCHROM", "TXSTART", "TXEND"), "SYMBOL") ## ----biomart, eval=FALSE-------------------------------------------------------------------------- # ## NEEDS INTERNET ACCESS !! # library(biomaRt) # head(listMarts(), 3) ## list marts # head(listDatasets(useMart("ensembl")), 3) ## mart datasets # ensembl <- ## fully specified mart # useMart("ensembl", dataset = "hsapiens_gene_ensembl") # # head(listFilters(ensembl), 3) ## filters # myFilter <- "chromosome_name" # substr(filterOptions(myFilter, ensembl), 1, 50) ## return values # myValues <- c("21", "22") # head(listAttributes(ensembl), 3) ## attributes # myAttributes <- c("ensembl_gene_id","chromosome_name") # # ## assemble and query the mart # res <- getBM(attributes = myAttributes, filters = myFilter, # values = myValues, mart = ensembl) ## ----annotationhub-gtf, eval=FALSE---------------------------------------------------------------- # library(AnnotationHub) # hub <- AnnotationHub() # hub # query(hub, c("Ensembl", "80", "gtf")) # ## ensgtf = display(hub) # visual choice # hub["AH47107"] # gtf <- hub[["AH47107"]] # gtf # txdb <- GenomicFeatures::makeTxDbFromGRanges(gtf) ## ----annotationhub-orgdb, eval=FALSE-------------------------------------------------------------- # library(AnnotationHub) # hub <- AnnotationHub() # query(hub, "OrgDb") ## ----annotationhub-roadmap, eval=FALSE------------------------------------------------------------ # library(AnnotationHub) # hub <- AnnotationHub() # query(hub , c("EpigenomeRoadMap", "E126", "H3K4ME2")) # E126 <- hub[["AH29817"]] ## ----annotationhub-liftover, eval=FALSE----------------------------------------------------------- # query(hub , c("hg19", "hg38", "chainfile")) # chain <- hub[["AH14150"]] ## ----liftover, eval=FALSE------------------------------------------------------------------------- # library(rtracklayer) # E126hg38 <- liftOver(E126, chain) # E126hg38 ## ----vcf, message=FALSE--------------------------------------------------------------------------- ## input variants library(VariantAnnotation) fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation") vcf <- readVcf(fl, "hg19") seqlevels(vcf) <- "chr22" ## known gene model library(TxDb.Hsapiens.UCSC.hg19.knownGene) coding <- locateVariants(rowRanges(vcf), TxDb.Hsapiens.UCSC.hg19.knownGene, CodingVariants()) head(coding) ## ----sessionInfo---------------------------------------------------------------------------------- sessionInfo()