## ----install-packages, eval=FALSE--------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) { # install.packages("BiocManager") # } # BiocManager::install(c("SummarizedExperiment", "ExperimentHub", "FieldEffectCrc")) ## ----load-hub----------------------------------------------------------------- library(SummarizedExperiment) library(ExperimentHub) ## ----find-data---------------------------------------------------------------- hub = ExperimentHub() ## simply list the resource names ns <- listResources(hub, "FieldEffectCrc") ns ## query the hub for full metadata crcData <- query(hub, "FieldEffectCrc") crcData ## extract metadata df <- mcols(crcData) df ## see where the cache is stored hc <- hubCache(crcData) hc ## ----explore-file------------------------------------------------------------- md <- hub["EH3524"] md ## ----download-data------------------------------------------------------------ ## using resource ID data1 <- hub[["EH3524"]] data1 ## using loadResources() data2 <- loadResources(hub, "FieldEffectCrc", "cohort A") data2 ## ----phenotypes--------------------------------------------------------------- summary(colData(data1)$sampType) ## ----quick-start-1------------------------------------------------------------ se <- data1 counts1 <- assays(se)[["counts"]] ## ----quick-start-2------------------------------------------------------------ counts2 <- assay(se, 2) ## ----quick-start-3------------------------------------------------------------ all(counts1==counts2) ## ----make-txi----------------------------------------------------------------- ## manual construction txi <- as.list(assays(se)) txi$countsFromAbundance <- "no" ## convenience function construction txi <- make_txi(se) ## ----make-dds----------------------------------------------------------------- if (!requireNamespace("DESeq2", quietly=TRUE)) { BiocManager::install("DESeq2") } library(DESeq2) dds <- DESeqDataSetFromTximport(txi, colData(se), ~ sampType) ## ----typical-filter----------------------------------------------------------- countLimit <- 10 sampleLimit <- (1/3) * ncol(dds) keep <- rowSums( counts(dds) > countLimit ) >= sampleLimit dds <- dds[keep, ] ## ----special-filter----------------------------------------------------------- r <- sample(seq_len(nrow(dds)), 6000) c <- sample(seq_len(ncol(dds)), 250) dds <- dds[r, c] r <- rowSums(counts(dds)) >= 10 dds <- dds[r, ] ## ----size-factors------------------------------------------------------------- dds <- DESeq(dds) ## ----prep-input--------------------------------------------------------------- dat <- counts(dds, normalized=TRUE) ## extract the normalized counts mod <- model.matrix(~sampType, data=colData(dds)) ## set the full model mod0 <- model.matrix(~1, data=colData(dds)) ## set the null model ## ----get-nsv------------------------------------------------------------------ if (!requireNamespace("sva", quietly=TRUE)) { BiocManager::install("sva") } library(sva) nsv <- num.sv(dat, mod, method=c("be"), B=20, seed=1) ## Buja Eyuboglu method ## ----get-svs------------------------------------------------------------------ svs <- svaseq(dat, mod, mod0, n.sv=nsv) ## ----add-svs------------------------------------------------------------------ for (i in seq_len(svs$n.sv)) { newvar <- paste0("sv", i) colData(dds)[ , newvar] <- svs$sv[, i] } nvidx <- (ncol(colData(dds)) - i + 1):ncol(colData(dds)) newvars <- colnames(colData(dds))[nvidx] d <- formula( paste0("~", paste(paste(newvars, collapse="+"), "sampType", sep="+")) ) design(dds) <- d ## ----test-de------------------------------------------------------------------ dds <- DESeq(dds) ## ----results------------------------------------------------------------------ cons <- list() m <- combn(levels(colData(dds)$sampType), 2) for (i in seq_len(ncol(m))) { cons[[i]] <- c("sampType", rev(m[, i])) names(cons) <- c( names(cons)[seq_len(length(cons) - 1)], paste(rev(m[, i]), collapse="v") ) } res <- list() for (i in seq_len(length(cons))) { res[[i]] <- results(dds, contrast=cons[[i]], alpha=0.05) ## default alpha is 0.1 } names(res) <- names(cons) ## ----display------------------------------------------------------------------ lapply(res, head) lapply(res, summary) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()