## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"-------------------- BiocStyle::latex() ## ----parallel, echo=FALSE, cache=FALSE, results="hide"--------------------- library(BiocParallel) # for speed we run everything in serial mode register(SerialParam()) ## ----knitr, echo=FALSE, cache=FALSE, results="hide"------------------------ library(knitr) opts_chunk$set( tidy=FALSE, dev="png", fig.width=7, fig.height=7, dpi=300, message=FALSE, warning=FALSE, cache=FALSE ) ## ----include=FALSE--------------------------------------------------------- opts_chunk$set(concordance=TRUE) ## ----loadFraser, echo=FALSE------------------------------------------------ suppressPackageStartupMessages({ library(FRASER) }) ## ----quick_fraser_guide, echo=TRUE----------------------------------------- # load FRASER library library(FRASER) # count data fds <- createTestFraserSettings() fds <- countRNAData(fds) fds # compute stats fds <- calculatePSIValues(fds) # filtering junction with low expression fds <- filterExpressionAndVariability(fds, minExpressionInOneSample=20, minDeltaPsi=0.0, filter=TRUE) # fit the splicing model for each metric # with a specific latentsapce dimension fds <- FRASER(fds, q=c(psi5=2, psi3=3, theta=3)) # we provide two ways to anntoate introns with the corresponding gene symbols: # the first way uses TxDb-objects provided by the user as shown here library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene orgDb <- org.Hs.eg.db fds <- annotateRangesWithTxDb(fds, txdb=txdb, orgDb=orgDb) # alternatively, we also provide a way to use biomart for the annotation: # fds <- annotateRanges(fds) # get results: we recommend to use an FDR cutoff 0.05, but due to the small # dataset size we extract all events and their associated values # eg: res <- results(fds, zScoreCutoff=NA, padjCutoff=0.05, deltaPsiCutoff=0.3) res <- results(fds, zScoreCutoff=NA, padjCutoff=NA, deltaPsiCutoff=NA) res # result visualization plotVolcano(fds, sampleID="sample1", type="psi5", aggregate=TRUE) ## ----sampleData Table, echo=TRUE------------------------------------------- sampleTable <- fread(system.file( "extdata", "sampleTable.tsv", package="FRASER", mustWork=TRUE)) head(sampleTable) ## ----FRASER setting example1, echo=TRUE------------------------------------ # convert it to a bamFile list bamFiles <- system.file(sampleTable[,bamFile], package="FRASER", mustWork=TRUE) sampleTable[,bamFile:=bamFiles] # create FRASER object settings <- FraserDataSet(colData=sampleTable, workingDir="FRASER_output") # show the FraserSettings object settings ## ----FRASER setting example2, echo=TRUE------------------------------------ settings <- createTestFraserSettings() settings ## ----parallelize example, eval=FALSE--------------------------------------- # # example of how to use parallelization: use 10 cores or the maximal number of # # available cores if fewer than 10 are available and use Snow if on Windows # if(.Platform$OS.type == "unix") { # register(MulticoreParam(workers=min(10, multicoreWorkers()))) # } else { # register(SnowParam(workers=min(10, multicoreWorkers()))) # } ## ----counting reads, echo=TRUE--------------------------------------------- # count reads fds <- countRNAData(settings) fds ## ----create fds with counts, echo=TRUE------------------------------------- # example sample annoation for precalculated count matrices sampleTable <- fread(system.file("extdata", "sampleTable_countTable.tsv", package="FRASER", mustWork=TRUE)) head(sampleTable) # get raw counts junctionCts <- fread(system.file("extdata", "raw_junction_counts.tsv.gz", package="FRASER", mustWork=TRUE)) head(junctionCts) spliceSiteCts <- fread(system.file("extdata", "raw_site_counts.tsv.gz", package="FRASER", mustWork=TRUE)) head(spliceSiteCts) # create FRASER object fds <- FraserDataSet(colData=sampleTable, junctions=junctionCts, spliceSites=spliceSiteCts, workingDir="FRASER_output") fds ## ----calculate psi/zscore values, echo=TRUE-------------------------------- fds <- calculatePSIValues(fds) fds ## ----filter_junctions, echo=TRUE------------------------------------------- fds <- filterExpressionAndVariability(fds, minDeltaPsi=0.0, filter=FALSE) plotFilterExpression(fds, bins=100) ## ----subset to filtered junctions, echo=TRUE------------------------------- fds_filtered <- fds[mcols(fds, type="j")[,"passed"]] fds_filtered # filtered_fds not further used for this tutorial because the example dataset # is otherwise too small ## ----sample_covariation, echo=TRUE----------------------------------------- # Heatmap of the sample correlation plotCountCorHeatmap(fds, type="psi5", logit=TRUE, normalized=FALSE) ## ----intron_sample_correlation, echo=TRUE, eval=FALSE---------------------- # # Heatmap of the intron/sample expression # plotCountCorHeatmap(fds, type="psi5", logit=TRUE, normalized=FALSE, # plotType="junctionSample", topJ=100, minDeltaPsi = 0.01) ## ----model fitting, echo=TRUE---------------------------------------------- # This is computational heavy on real size datasets and can take awhile fds <- FRASER(fds, q=c(psi5=3, psi3=5, theta=2)) ## ----covariation_after_fitting, echo=TRUE---------------------------------- plotCountCorHeatmap(fds, type="psi5", normalized=TRUE, logit=TRUE) ## ----result table, echo=TRUE----------------------------------------------- # annotate introns with the HGNC symbols of the corresponding gene library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene orgDb <- org.Hs.eg.db fds <- annotateRangesWithTxDb(fds, txdb=txdb, orgDb=orgDb) # fds <- annotateRanges(fds) # alternative way using biomaRt # retrieve results with default and recommended cutoffs (padj <= 0.05 and # |deltaPsi| >= 0.3) res <- results(fds) ## ----result_table, echo=TRUE----------------------------------------------- # to show result visualization functions for this tuturial, zScore cutoff used res <- results(fds, zScoreCutoff=2, padjCutoff=NA, deltaPsiCutoff=0.1) res ## ----finding_candidates, echo=TRUE----------------------------------------- plotVolcano(fds, type="psi5", "sample10") ## ----sample result, echo=TRUE---------------------------------------------- sampleRes <- res[res$sampleID == "sample10"] sampleRes ## ----plot_expression, echo=TRUE, eval=FALSE-------------------------------- # plotExpression(fds, type="psi5", result=sampleRes[1]) # plotExpectedVsObservedPsi(fds, result=sampleRes[1]) ## ----save_load, echo=TRUE-------------------------------------------------- # saving a fds workingDir(fds) <- "FRASER_output" name(fds) <- "ExampleAnalysis" saveFraserDataSet(fds, dir=workingDir(fds), name=name(fds)) # two ways of loading a fds by either specifying the directory and anaysis name # or directly giving the path the to fds-object.RDS file fds <- loadFraserDataSet(dir=workingDir(fds), name=name(fds)) fds <- loadFraserDataSet(file=file.path(workingDir(fds), "savedObjects", "ExampleAnalysis", "fds-object.RDS")) ## ----control confounders, echo=TRUE---------------------------------------- # Using an alternative way to correct splicing ratios # here: only 2 iteration to speed the calculation up # for the vignette, the default is 15 iterations fds <- fit(fds, q=3, type="psi5", implementation="PCA-BB-Decoder", iterations=2) ## ----findBestQ, echo=TRUE-------------------------------------------------- set.seed(42) # hyperparameter opimization fds <- optimHyperParams(fds, type="psi5", plot=FALSE) # retrieve the estimated optimal dimension of the latent space bestQ(fds, type="psi5") ## ----figure_findBestQ, echo=TRUE------------------------------------------- plotEncDimSearch(fds, type="psi5") ## ----p-value calculation, echo=TRUE---------------------------------------- fds <- calculatePvalues(fds, type="psi5") head(pVals(fds, type="psi5")) ## ----p-adj calculation, echo=TRUE------------------------------------------ fds <- calculatePadjValues(fds, type="psi5", method="BY") head(padjVals(fds,type="psi5")) ## ----z-score calculation, echo=TRUE---------------------------------------- fds <- calculateZscore(fds, type="psi5") head(zScores(fds, type="psi5")) ## ----result_visualization, echo=TRUE--------------------------------------- plotAberrantPerSample(fds) # qq-plot for single junction plotQQ(fds, result=res[1]) # global qq-plot (on gene level since aggregate=TRUE) plotQQ(fds, aggregate=TRUE, global=TRUE) ## ----sessionInfo, echo=FALSE----------------------------------------------- sessionInfo()