## ----rmdsetup, include = FALSE------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, eval=TRUE, echo=TRUE, cache=FALSE--------------------------------- library(seventyGeneData) ## ----downloadVantVeer, eval=FALSE, echo=TRUE, cache=FALSE--------------------- # ### Create a working directory # dir.create("../extdata/vantVeer", showWarnings = FALSE, recursive = TRUE) # ### Create the url list for all supplementary data on the Nature Website # nkiUrl <- "http://bioinformatics.nki.nl/data/van-t-Veer_Nature_2002/" # natureUrl <- "http://www.nature.com/nature/journal/v415/n6871/extref/" # urlList <- c( # paste(nkiUrl, # sep = "", # c( # "ArrayData_greater_than_5yr.zip", # "ArrayData_less_than_5yr.zip", "ArrayData_19samples.zip", # "ArrayData_BRCA1.zip", "ArrayNomenclature_contig_accession.xls", # "ArrayNomenclature_methods.doc", "ProbeSeq.xls", # "README-Nature_I.doc", "codeboek_Rosetta.doc" # ) # ), # paste(natureUrl, # sep = "", # c( # "415530a-s7.doc", "415530a-s8.xls", # "415530a-s9.xls", "415530a-s10.xls", "415530a-s11.xls" # ) # ) # ) # ### Dowload all files from Nature and NKI # lapply(urlList, function(x) { # download.file(x, # destfile = paste("../extdata/vantVeer/", gsub(".+/", "", x), sep = ""), # quiet = FALSE, mode = "w", cacheOK = TRUE # ) # }) ## ----downloadVanDeVijver, eval=FALSE, echo=TRUE, cache=FALSE------------------ # ### Create a working directory # dir.create("../extdata/vanDeVijver", showWarnings = FALSE, recursive = TRUE) # ### Create the url list for all supplementary data on the NKI Website # nkiUrl <- "http://bioinformatics.nki.nl/data/" # urlList <- paste(nkiUrl, sep = "", c("nejm_table1.zip", "ZipFiles295Samples.zip")) # ### Dowload all files from NKI # lapply(urlList, function(x) { # download.file(x, # destfile = paste("../extdata/vanDeVijver/", gsub(".+/", "", x), sep = ""), # quiet = FALSE, mode = "w", cacheOK = TRUE # ) # }) ## ----getPackagesBioc, eval=FALSE, echo=TRUE, cache=FALSE---------------------- # ### Get the list of available packages # installedPckgs <- installed.packages()[, "Package"] # ### Define the list of desired libraries # pckgListBIOC <- c("Biobase", "limma", "breastCancerNKI", "readxl") # ### Use the BiocManager package from Bioconductor # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # ### Load the packages, install them from Bioconductor if needed # for (pckg in pckgListBIOC) { # if (!pckg %in% installedPckgs) BiocManager::install(pckg) # require(pckg, character.only = TRUE) # } ## ----assembleAnnotation, eval=FALSE, echo=TRUE, cache=FALSE------------------- # ### Load the library with annotation # library(Biobase) # library(breastCancerNKI) # ### Load the dataset # data(nki) # ### Check dataset classes and attributes # class(nki) # dim(nki) # ### Check featureData # str(featureData(nki)) # nkiAnn <- featureData(nki) # ### Turn all annotation information into character # nkiAnn@data <- as.data.frame(apply(nkiAnn@data, 2, as.character), # stringsAsFactors = FALSE # ) ## ----assembleAnnotation2, eval=FALSE, echo=TRUE, cache=FALSE------------------ # ### Load the library # library(readxl) # ### Read GBACC information for van't Veer dataset # myFile <- system.file("extdata/vantVeer", "ArrayNomenclature_contig_accession.xls", # package = "seventyGeneData" # ) # featAcc <- read_xls(myFile) # ### Read seq information for van't Veer dataset # myFile <- system.file("extdata/vantVeer", "ProbeSeq.xls", # package = "seventyGeneData" # ) # featSeq <- read_xls(myFile) # ### Read 70-genes signature information for van't Veer dataset # myFile <- system.file("extdata/vantVeer", "415530a-s9.xls", # package = "seventyGeneData" # ) # gns231 <- read_xls(myFile) # ### Remove special characters in the colums header, # ### which are due to white spaces present in the Excel files # colnames(gns231) <- gsub("\\s|#", "", colnames(gns231)) # ### Remove GO annotation # gns231 <- gns231[, -grep("sp_xref_keyword_list", colnames(gns231))] # ### Reorder the genes in decreasing order by absolute correlation # gns231 <- gns231[order(abs(gns231$correlation), decreasing = TRUE), ] # ### Select the feature identifiers corresponding to the top 231 and 70 genes # gns231$genes231 <- TRUE # gns231$genes70 <- gns231$accession %in% gns231$accession[1:70] # ### Merge all information (including 70-gene signature information) # ### with the annotation obtained from the breastCancerNKI package # newAnn <- nkiAnn@data # newAnn <- merge(newAnn, featAcc, by.x = 1, by.y = 1, all = TRUE, sort = FALSE) # newAnn <- merge(newAnn, featSeq, by.x = 1, by.y = 1, all = TRUE, sort = FALSE) # newAnn <- merge(newAnn, gns231, by.x = 1, by.y = 1, all = TRUE, sort = FALSE) ## ----assembleAnnotation3, eval=FALSE, echo=TRUE, cache=FALSE------------------ # ### Check the structure of the new annotation data.frame # newAnn <- newAnn[order(newAnn[, 1]), ] # str(newAnn) ## ----assembleVantVeer, eval=FALSE, echo=TRUE, cache=FALSE--------------------- # ### Load the library # library(Biobase) # library(readxl) # ### Check presence of dowloaded file # filesVtVloc <- system.file("extdata/vantVeer", package = "seventyGeneData") # dir(filesVtVloc) # ### Create list of files to be read in # filesVtV <- dir(filesVtVloc, full.names = TRUE, pattern = "^ArrayData") # filesVtV ## ----assembleVantVeer2, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # myFile <- system.file("extdata/vantVeer", "415530a-s8.xls", # package = "seventyGeneData" # ) # ### Read phenotypic information # phenoVtV <- as.data.frame(read_xls(myFile)) # ### Show Phenotypic information # str(phenoVtV) ## ----assembleVantVeer3, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # ### Remove the special characters in the colums headers # ### due to white spaces present in the Excel file # colnames(phenoVtV) <- gsub("\\s|#", "", colnames(phenoVtV)) # #### Remove columns that do not contain useful information # phenoVtV <- phenoVtV[, apply(phenoVtV, 2, function(x) length(unique(x)) > 1)] # phenoVtV$SampleName <- paste("Sample", phenoVtV$Sample) # rownames(phenoVtV) <- phenoVtV$SampleName # ### Read sample names from the 6 expression data tables # samplesVtV <- lapply(filesVtV, read.table, # nrow = 1, header = FALSE, sep = "\t", # stringsAsFactors = FALSE, fill = TRUE, strip.white = TRUE # ) # ### Format the samples strings # samplesVtV <- lapply(samplesVtV, function(x) x[grep("^Sample", x)]) # headerDesc <- samplesVtV # samplesVtV <- lapply(samplesVtV, function(x) gsub(",.+", "", x)) ## ----assembleVantVeer4, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # ### Check sample lables obtained from expression data files # str(samplesVtV) # ### Combine the lables in one unique vector # allSamplesVtV <- do.call("c", samplesVtV) # ### Compare order the order the samples between the expression data # ### and phenotypic information data.frames # if (all(rownames(phenoVtV) %in% allSamplesVtV)) { # print("All sample names match phenoData") # if (all(rownames(phenoVtV) == allSamplesVtV)) { # print("All sample names match phenoData") # } else { # print("Sample names from tables and phenoData need reordering") # phenoVtV <- phenoVtV[order(phenoVtV$SampleName), ] # } # } else { # print("Sample names DO NOT match phenoData") # } ## ----assembleVantVeer5, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # ### Read expression data from the 4 converted TAB-delimited text files # dataVtV <- lapply(filesVtV, read.table, # skip = 1, sep = "\t", quote = "", # header = TRUE, row.names = NULL, # stringsAsFactors = FALSE, fill = FALSE, strip.white = FALSE # ) # sapply(dataVtV, dim) # ### Extract annotation: note that column headers are slightly different # sapply(dataVtV, function(x) head(colnames(x))) # sapply(dataVtV, function(x) tail(colnames(x))) # ### Extract the associated annotation # annVtV <- lapply(dataVtV, function(x) x[, c("Systematic.name", "Gene.name")]) # annVtV <- lapply(annVtV, function(x) { # x[x == ""] <- NA # x # }) # annVtV <- do.call("cbind", annVtV) ## ----assembleVantVeer6, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # ### Check annotation order in all data files # if (all(apply(annVtV[, seq(1, 8, by = 2)], 1, function(x) length(unique(x)) == 1))) { # print("OK") # annVtV <- annVtV[, 1:2] # } else { # print("Check annotation") # } ## ----extractColumns, eval=FALSE, echo=TRUE, cache=FALSE----------------------- # ### Define the function # extractColumns <- function(x, pattern, ann) { # sel <- grep(pattern, colnames(x), value = TRUE) # x <- x[, sel] # rownames(x) <- ann # x <- x[order(rownames(x)), ] # } ## ----assembleVantVeer7, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # ### Extract log ratio data from all the spreadsheets # logRat <- lapply(dataVtV, extractColumns, pattern = "Log10\\.ratio", ann = annVtV[, 1]) # logRat <- do.call("cbind", logRat) # ### Assign colnames and reorder the columns # colnames(logRat) <- allSamplesVtV # logRat <- logRat[, order(colnames(logRat)), ] ## ----assembleVantVeer8, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # ### Check order # all(phenoVtV$SampleName == colnames(logRat)) ## ----assembleVantVeer9, eval=FALSE, echo=TRUE, cache=FALSE-------------------- # ### Extract p-values from all the spreadsheets # pVal <- lapply(dataVtV, extractColumns, pattern = "value", ann = annVtV[, 1]) # pVal <- do.call("cbind", pVal) # ### Assign colnames and reorder the columns # colnames(pVal) <- allSamplesVtV # pVal <- pVal[, order(colnames(pVal)), ] ## ----assembleVantVeer10, eval=FALSE, echo=TRUE, cache=FALSE------------------- # ### Check order # all(phenoVtV$SampleName == colnames(pVal)) ## ----assembleVantVeer11, eval=FALSE, echo=TRUE, cache=FALSE------------------- # ### Extract expression intensity from all the spreadsheets # intensity <- lapply(dataVtV, extractColumns, pattern = "Intensity", ann = annVtV[, 1]) # intensity <- do.call("cbind", intensity) # ### Assign colnames and reorder the columns # colnames(intensity) <- allSamplesVtV # intensity <- intensity[, order(colnames(intensity)), ] ## ----assembleVantVeer12, eval=FALSE, echo=TRUE, cache=FALSE------------------- # ### Check order # all(phenoVtV$SampleName == colnames(intensity)) ## ----assembleVantVeer13, eval=FALSE, echo=TRUE, cache=FALSE------------------- # ### Merge annotation objects and check order # annVtV <- merge(annVtV, newAnn, by = 1, all = TRUE, sort = TRUE) # rownames(annVtV) <- annVtV[, 1] # all(rownames(annVtV) == rownames(logRat)) # all(rownames(annVtV) == rownames(pVal)) # all(rownames(annVtV) == rownames(intensity)) # ### Create the new assayData # myAssayData <- assayDataNew(exprs = logRat, pValue = pVal, intensity = intensity) # ### Create the new phenoData # myPhenoData <- new("AnnotatedDataFrame", phenoVtV) # ### Create the new featureData # myFeatureData <- new("AnnotatedDataFrame", annVtV) # ### Create the new experimentData # myExperimentData <- new("MIAME", # name = "Marc J Van De Vijver, Hongyue Dai, and Laura J van't Veer", # lab = "The Netherland Cancer Institute, Amsterdam, The Netherlands", # contact = "Luigi Marchionni ", # title = "Gene expression profiling predicts clinical outcome of breast cancer", # abstract = "Breast cancer patients with the same stage of disease can have markedly different treatment responses and overall outcome. # The strongest predictors for metastases (for example, lymph node status and histological grade) fail to classify accurately breast tumours according to their clinical behaviour. # Chemotherapy or hormonal therapy reduces the risk of distant metastases by approximately one-third; however, 70-80% of patients receiving this treatment would have survived without it. # None of the signatures of breast cancer gene expression reported to date allow for patient-tailored therapy strategies. # Here we used DNA microarray analysis on primary breast tumours of 117 young patients, and applied supervised classification to identify a gene expression signature strongly predictive of a short interval to distant metastases (`poor prognosis' signature) in patients without tumour cells in local lymph nodes at diagnosis (lymph node negative). # In addition, we established a signature that identifies tumours of BRCA1 carriers. The poor prognosis signature consists of genes regulating cell cycle, invasion, metastasis and angiogenesis. # This gene expression profile will outperform all currently used clinical parameters in predicting disease outcome. Our findings provide a strategy to select patients who would benefit from adjuvant therapy.", # url = "http://www.ncbi.nlm.nih.gov/pubmed/?term=11823860", # pubMedIds = "11823860" # ) # ### Create the expression set # vantVeer <- new("ExpressionSet", # assayData = myAssayData, # phenoData = myPhenoData, # featureData = myFeatureData, # experimentData = myExperimentData # ) ## ----assembelVanDeVijver, eval=FALSE, echo=TRUE, cache=FALSE------------------ # ################################################## # ### Load the library # library(Biobase) # library(readxl) # ################################################## # ### Check presence of dowloaded files # dir("../inst/extdata/vanDeVijver") ## ----assembelVanDeVijve1, eval=FALSE, echo=TRUE, cache=FALSE------------------ # ### Check presence of dowloaded file # filesVdVloc <- system.file("extdata/vanDeVijver", package = "seventyGeneData") # dir(filesVdVloc) # ### Create list of files to be unzipped and read in # filesVdVzip <- dir(filesVdVloc, full.names = TRUE) # filesVdVzip # ### Create output directory # myTmpDir <- paste(filesVdVloc, "/tmp", sep = "") # ### Decompress expression # unzip(filesVdVzip[1], exdir = myTmpDir) # ### Decompress phenoData # unzip(filesVdVzip[2], exdir = myTmpDir) # ### List of files in "ZipFiles295Samples.zip" containing expression # filesVdV <- dir(myTmpDir, full.names = TRUE, pattern = "NKI") # ### Show file list content # filesVdV ## ----assembelVanDeVijver2, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Read phenotypic information # myFile <- dir(myTmpDir, full.names = TRUE, pattern = "Table1_ClinicalData_Table.xls") # phenoVdV <- as.data.frame(read_xls(myFile, skip = 3)) # #### Remove columns that do not contain useful information # phenoVdV <- phenoVdV[, apply(phenoVdV, 2, function(x) length(unique(x)) > 1)] # phenoVdV$SampleName <- paste("Sample", phenoVdV$SampleID) # rownames(phenoVdV) <- phenoVdV$SampleName # ### Read sample names from the expression data spreadsheets # samplesVdV <- lapply(filesVdV, scan, what = "character", nlines = 1, sep = "\t", strip.white = FALSE) # samplesVdV <- lapply(samplesVdV, function(x) x[x != ""]) # allSamplesVdV <- do.call("c", samplesVdV) # ### Read all data contained in the expression data spreadsheets # dataVdV <- lapply(filesVdV, read.table, # header = TRUE, skip = 1, sep = "\t", quote = "", # stringsAsFactors = FALSE, fill = TRUE, strip.white = TRUE # ) # ### Extract feature annotation # annVdV <- lapply(dataVdV, function(x) x[, c("Substance", "Gene")]) # annVdV <- lapply(annVdV, function(x) { # x[x == ""] <- NA # x # }) # annVdV <- do.call("cbind", annVdV) ## ----assembelVanDeVijver2a, eval=FALSE, echo=TRUE, cache=FALSE---------------- # ### Check annotation order in all data files # if (all(apply(annVdV[, seq(1, 12, by = 2)], 1, function(x) length(unique(x)) == 1))) { # print("OK") # annVdV <- annVdV[, 1:2] # } else { # print("Check annotation") # } ## ----extractColumns2, eval=FALSE, echo=TRUE, cache=FALSE---------------------- # ### Define the function # extractColumns <- function(x, pattern, annVdV) { # colnames(x) <- gsub("Log\\.Ratio\\.Error", "Error", colnames(x)) # sel <- grep(pattern, colnames(x), value = TRUE) # x <- x[, sel] # rownames(x) <- annVdV # x <- x[order(rownames(x)), ] # } ## ----assembelVanDeVijver3, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Extract and assemble the log ratio values # logRat <- lapply(dataVdV, extractColumns, pattern = "Log\\.Ratio", ann = annVdV[, 1]) # logRat <- do.call("cbind", logRat) # ### Set the column names # colnames(logRat) <- allSamplesVdV ## ----assembelVanDeVijver4, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Check order # all(phenoVdV$SampleName == colnames(logRat)) ## ----assembelVanDeVijver5, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Extract log ratio error # logRatError <- lapply(dataVdV, extractColumns, pattern = "Error", ann = annVdV[, 1]) # logRatError <- do.call("cbind", logRatError) # ### Set the column names # colnames(logRatError) <- allSamplesVdV ## ----assembelVanDeVijver6, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Check order # all(phenoVdV$SampleName == colnames(logRatError)) ## ----assembelVanDeVijver7, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Extract P-value # pVal <- lapply(dataVdV, extractColumns, pattern = "alue", ann = annVdV[, 1]) # pVal <- do.call("cbind", pVal) # ### Set the column names # colnames(pVal) <- allSamplesVdV ## ----assembelVanDeVijver8, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Check order # all(phenoVdV$SampleName == colnames(pVal)) ## ----assembelVanDeVijver9, eval=FALSE, echo=TRUE, cache=FALSE----------------- # ### Extract Intensity # intensity <- lapply(dataVdV, extractColumns, pattern = "Intensity", ann = annVdV[, 1]) # intensity <- do.call("cbind", intensity) # ### Set the column names # colnames(intensity) <- allSamplesVdV ## ----assembelVanDeVijver10, eval=FALSE, echo=TRUE, cache=FALSE---------------- # ### Check order # all(phenoVdV$SampleName == colnames(intensity)) ## ----assembelVanDeVijver11, eval=FALSE, echo=TRUE, cache=FALSE---------------- # ### Merge and check order # annVdV <- merge(annVdV, newAnn, by = 1, all = TRUE, sort = TRUE) # rownames(annVdV) <- annVdV[, 1] # all(rownames(annVdV) == rownames(logRat)) # all(rownames(annVdV) == rownames(logRatError)) # all(rownames(annVdV) == rownames(pVal)) # all(rownames(annVdV) == rownames(intensity)) # ### Create the new assayData # myAssayData <- assayDataNew( # exprs = logRat, exprsError = logRatError, # pValue = pVal, intensity = intensity # ) # ### Create the new phenoData # myPhenoData <- new("AnnotatedDataFrame", phenoVdV) # ### Create the new featureData # myFeatureData <- new("AnnotatedDataFrame", annVdV) # ### Create the new experimentData # myExperimentData <- new("MIAME", # name = "Marc J Van De Vijver, Yudong D He, and Laura J van't Veer", # lab = "The Netherland Cancer Institute, Amsterdam, The Netherlands", # contact = "Luigi Marchionni ", # title = "A gene-expresion signature as a predictor of survival in breast cancer", # abstract = "Background: A more accurate means of prognostication in breast cancer will improve the selection of patients for adjuvant systemic therapy. # Methods: Using microarray analysis to evaluate our previously established 70-gene prognosis profile, we classified a series of 295 consecutive patients with primary breast carcinomas as having a gene expression signature associated with either a poor prognosis or a good prognosis. # All patients had stage I or II breast cancer and were younger than 53 years old; 151 had lymph-node-negative disease, and 144 had lymph-node-positive disease. We evaluated the predictive power of the prognosis profile using univariable and multivariable statistical analyses. # Results: Among the 295 patients, 180 had a poor-prognosis signature and 115 had a good-prognosis signature, and the mean (+/-SE) overall 10-year survival rates were 54.6+/-4.4 percent and 94.5+/-2.6 percent, respectively. # At 10 years, the probability of remaining free of distant metastases was 50.6+/-4.5 percent in the group with a poor-prognosis signature and 85.2+/-4.3 percent in the group with a good-prognosis signature. # The estimated hazard ratio for distant metastases in the group with a poor-prognosis signature, as compared with the group with the good-prognosis signature, was 5.1 (95 percent confidence interval, 2.9 to 9.0; P<0.001). # This ratio remained significant when the groups were analyzed according to lymph-node status. Multivariable Cox regression analysis showed that the prognosis profile was a strong independent factor in predicting disease outcome. # Conclusions: The gene-expression profile we studied is a more powerful predictor of the outcome of disease in young patients with breast cancer than standard systems based on clinical and histologic criteria. (N Engl J Med 2002;347:1999-2009.)", # url = "http://www.ncbi.nlm.nih.gov/pubmed/?term=12490681", # pubMedIds = "12490681" # ) # ### Create the expression set # vanDeVijver <- new("ExpressionSet", # assayData = myAssayData, # phenoData = myPhenoData, # featureData = myFeatureData, # experimentData = myExperimentData # ) # ### Remove temporary folder # file.remove(dir(myTmpDir, full.names = TRUE)) # file.remove(myTmpDir) ## ----addSetInfo, eval=FALSE, echo=TRUE, cache=FALSE--------------------------- # ### Define the data set type from file of origin # type <- gsub("..txt", "", gsub(".+ArrayData_", "", filesVtV)) # dataSetType <- mapply(x = samplesVtV, y = type, FUN = function(x, y) { # rep(y, length(x)) # }) # ### Combine with sample information # dataSetType <- do.call("c", dataSetType) # names(dataSetType) <- allSamplesVtV # ### Reorder # dataSetType <- dataSetType[order(names(dataSetType))] ## ----addSetInfo1, eval=FALSE, echo=TRUE, cache=FALSE-------------------------- # ### Add the information to pData(vantVeer) # if (all(rownames(pData(vantVeer)) == names(dataSetType))) { # pData(vantVeer)$DataSetType <- dataSetType # print("Adding information about data set type to pData") # } else { # print("Check order pData and data set type information") # } ## ----ttmVentVeer, eval=FALSE, echo=TRUE, cache=FALSE-------------------------- # ### Process time metastases (TTM) # pData(vantVeer)$TTM <- pData(vantVeer)$followup.time.yr # #### Process TTM event # pData(vantVeer)$TTMevent <- pData(vantVeer)$metastases # #### Create binary TTM at 5 years groups # pData(vantVeer)$FiveYearMetastasis <- pData(vantVeer)$TTM < 5 & pData(vantVeer)$TTMevent == 1 # ### Show structure of updated phenotypes # str(pData(vantVeer)) # ### Save the final ExpressionSet object # dataDirLoc <- system.file("data", package = "seventyGeneData") # save(vantVeer, file = paste(dataDirLoc, "/vantVeer.rda", sep = "")) ## ----ttmVanDeVijver, eval=FALSE, echo=TRUE, cache=FALSE----------------------- # ### Select new cases not included in the van't Veer study # pVDV <- pData(vanDeVijver) # ### Rename columns # selNames <- c("TIMEmeta", "EVENTmeta", "TIMEsurvival", "EVENTdeath", "TIMErecurrence") # newNames <- c("TTM", "TTMevent", "OS", "OSevent", "RFS") # colnames(pVDV)[sapply(selNames, grep, colnames(pVDV))] <- newNames # ### Process time metastases (TTM) # pVDV$TTM[is.nan(pVDV$TTM)] <- pVDV$OS[is.nan(pVDV$TTM)] # ### Process recurrence free survival (RFS) adding RFSevent # pVDV$RFSevent <- pVDV$RFS < pVDV$OS # ### Create binary TTM at 5 years groups selecting: # ### 1) the cases with metastases as first event within 5 years # badCases <- which( # pVDV$TTM <= pVDV$RFS ### Met is 1st recurrence # & pVDV$TTMevent == 1 ### Metastases occurred # & pVDV$TTM < 5 ### Recurrence within 5 years # ) # ### 2) the cases disease free for at least 5 years # goodCases <- which( # pVDV$TTM > 5 ### No metastasis before 5 years # & pVDV$RFS > 5 ### No recurrence before 5 years # & pVDV$TTMevent == 0 ### Metastases did notoccurred # ) ## ----ttmVanDeVijver2, eval=FALSE, echo=TRUE, cache=FALSE---------------------- # ### Check if there are duplicated cased present in both prognostic groups # all(!goodCases %in% badCases) ## ----ttmVanDeVijver3, eval=FALSE, echo=TRUE, cache=FALSE---------------------- # ### Create groups by setting all cases to NA and then identifying bad cases # pVDV$FiveYearMetastasis <- NA # pVDV$FiveYearMetastasis[badCases] <- TRUE # ### And then excluding patients with a relapse before a metastasis within 5 years # pVDV$FiveYearMetastasis[goodCases] <- FALSE # ### Assign updated phenotypic data # pData(vanDeVijver) <- pVDV # ### Show structure of updated phenotypes # str(pData(vanDeVijver)) # ### Save the final ExpressionSet object # dataDirLoc <- system.file("data", package = "seventyGeneData") # save(vanDeVijver, file = paste(dataDirLoc, "/vanDeVijver.rda", sep = "")) ## ----A.sessioInfo, echo=TRUE, eval=TRUE, cache=FALSE-------------------------- sessionInfo()