## ----eval=TRUE,echo=FALSE,out.width="100%",fig.cap="Predictor design."-------- knitr::opts_chunk$set(crop=NULL) knitr::include_graphics("vignette_psn.png") ## ----eval=TRUE,echo=FALSE,out.width="80%",fig.cap="Workflow."----------------- knitr::include_graphics("vignette_workflow.png") ## ----eval=TRUE---------------------------------------------------------------- suppressWarnings(suppressMessages(require(netDx))) ## ----eval=TRUE---------------------------------------------------------------- suppressMessages(library(curatedTCGAData)) ## ----eval=TRUE---------------------------------------------------------------- curatedTCGAData(diseaseCode="BRCA", assays="*",dry.run=TRUE,version="1.1.38") ## ----eval=TRUE---------------------------------------------------------------- brca <- suppressMessages( curatedTCGAData("BRCA", c("mRNAArray","miRNA*","Methylation_methyl27*"), dry.run=FALSE,version="1.1.38")) ## ----class.source="codeblock",eval=TRUE--------------------------------------- brca ## ----class.source="codeblock",eval=TRUE--------------------------------------- summary(assays(brca)) ## ----class.source="codeblock",eval=TRUE--------------------------------------- names(assays(brca)) ## ----class.source="codeblock",eval=TRUE--------------------------------------- mir <- assays(brca)[["BRCA_miRNASeqGene-20160128"]] head(mir[,1:5]) ## ----class.source="codeblock",eval=TRUE--------------------------------------- pheno <- colData(brca) colnames(pheno)[1:20] head(pheno[,1:5]) ## ----eval=TRUE---------------------------------------------------------------- source("prepare_data.R") brca <- prepareData(brca,setBinary=TRUE) ## ----eval=TRUE---------------------------------------------------------------- pID <- colData(brca)$patientID colData(brca)$ID <- pID ## ----eval=TRUE---------------------------------------------------------------- set.seed(123) dsets <- subsampleValidationData(brca,pctValidation=0.1) brca <- dsets$trainMAE holdout <- dsets$validationMAE ## ----eval=TRUE---------------------------------------------------------------- groupList <- list() # genes in mRNA data are grouped by pathways pathFile <- sprintf("%s/extdata/pathway_ex3.gmt", path.package("netDx")) pathList <- suppressMessages(readPathways(pathFile)) groupList[["BRCA_mRNAArray-20160128"]] <- pathList ## ----class.source="codeblock",eval=TRUE--------------------------------------- summary(groupList) ## ----class.source="codeblock",eval=TRUE--------------------------------------- names(groupList[["BRCA_mRNAArray-20160128"]]) ## ----eval=TRUE---------------------------------------------------------------- length(groupList[["BRCA_mRNAArray-20160128"]][[1]]) ## ----eval=TRUE---------------------------------------------------------------- head(groupList[["BRCA_mRNAArray-20160128"]][[1]]) ## ----eval=TRUE---------------------------------------------------------------- groupList[["clinical"]] <- list( age="patient.age_at_initial_pathologic_diagnosis", stage="STAGE" ) ## ----eval=TRUE---------------------------------------------------------------- tmp <- list(rownames(experiments(brca)[[1]])); names(tmp) <- names(brca)[1] groupList[[names(brca)[[1]]]] <- tmp tmp <- list(rownames(experiments(brca)[[2]])); names(tmp) <- names(brca)[2] groupList[[names(brca)[2]]] <- tmp tmp <- list(rownames(experiments(brca)[[3]])); names(tmp) <- names(brca)[3] groupList[[names(brca)[3]]] <- tmp ## ----eval=TRUE---------------------------------------------------------------- sims <- list( a="pearsonCorr", b="normDiff", c="pearsonCorr", d="pearsonCorr" ) # map layer names to sims names(sims) <- names(groupList) ## ----eval=TRUE---------------------------------------------------------------- nco <- round(parallel::detectCores()*0.75) # use 75% available cores message(sprintf("Using %i of %i cores", nco, parallel::detectCores())) outDir <- paste(tempdir(),"pred_output",sep=getFileSep()) # use absolute path if (file.exists(outDir)) unlink(outDir,recursive=TRUE) numSplits <- 2L ## ----eval=TRUE---------------------------------------------------------------- t0 <- Sys.time() model <- suppressMessages( buildPredictor( dataList=brca, ## your data groupList=groupList, ## grouping strategy sims=sims, outDir=outDir, ## output directory trainProp=0.8, ## pct of samples to use to train model in ## each split numSplits=2L, ## number of train/test splits featSelCutoff=1L, ## threshold for calling something ## feature-selected featScoreMax=2L, ## max score for feature selection numCores=nco, ## set higher for parallelizing debugMode=FALSE, keepAllData=FALSE, ## set to TRUE for debugging logging="none" ## set to "default" for messages )) t1 <- Sys.time() print(t1-t0) ## ----eval=TRUE---------------------------------------------------------------- results <- getResults( model, unique(colData(brca)$STATUS), featureSelCutoff=2L, featureSelPct=0.50 ) ## ----class.source="codeblock",eval=TRUE--------------------------------------- summary(results) ## ----class.source="codeblock",eval=TRUE--------------------------------------- results$performance ## ----class.source="codeblock", eval=TRUE-------------------------------------- results$featureScores ## ----class.source="codeblock",eval=TRUE--------------------------------------- confMat <- confusionMatrix(model) ## ----class.source="codeblock",eval=TRUE--------------------------------------- results$selectedFeatures ## ----eval=TRUE---------------------------------------------------------------- outDir <- paste(tempdir(), randAlphanumString(), sep = getFileSep()) if (file.exists(outDir)) unlink(outDir,recursive=TRUE) dir.create(outDir) predModel <- suppressMessages( predict(trainMAE=brca, testMAE=holdout, groupList=groupList, selectedFeatures=results$selectedFeatures, sims=sims, outDir=outDir, verbose = FALSE) ) ## ----eval=TRUE---------------------------------------------------------------- perf <- getPerformance(predModel, unique(colData(brca)$STATUS)) summary(perf) ## ----eval=TRUE---------------------------------------------------------------- plotPerf_multi(list(perf$rocCurve), plotTitle = sprintf( "BRCA Validation: %i samples", nrow(colData(holdout)))) plotPerf_multi(list(perf$prCurve), plotType = "PR", plotTitle = sprintf( "BRCA Validation: %i samples", nrow(colData(holdout)))) ## ----class.source="codeblock",fig.width=8,fig.height=8, eval=TRUE------------- ## this call doesn't work in Rstudio; for now we've commented this out and saved the PSN file. psn <- suppressMessages(getPSN( brca, groupList, sims=sims, selectedFeatures=results$selectedFeatures )) ## ----------------------------------------------------------------------------- library(Rtsne) tsne <- tSNEPlotter( psn$patientSimNetwork_unpruned, colData(brca) ) ## ----eval=TRUE---------------------------------------------------------------- sessionInfo()