## ----intro1------------------------------------------------------------------- library("MASS") data("crabs") dim(crabs) crabs[1:4,] table(crabs$sex) ## ----figbwplot, echo=FALSE, fig.cap="\\label{fig:figbwplot} Boxplots of RW, the rear width in mm, stratified by species('B' or 'O' for blue or orange) and sex ('F' and 'M')."---- library("lattice") print(bwplot(RW~sp|sex, data=crabs)) ## ----dop---------------------------------------------------------------------- m1 = glm(sp~RW, data=crabs, family=binomial) summary(m1) ## ----domo, results='hide', fig.show="hide"------------------------------------ plot(predict(m1,type="response"), crabs$sp,) table(predict(m1,type="response")>.5, crabs$sp) m2 = update(m1, subset=(sex=="F")) table(predict(m2,type="response")>.5, crabs$sp[crabs$sex=="F"]) ## ----doml1, message=FALSE----------------------------------------------------- library(MLInterfaces) fcrabs = crabs[crabs$sex == "F", ] ml1 = MLearn( sp~RW, fcrabs,glmI.logistic(thresh=.5), c(1:30, 51:80), family=binomial) ml1 confuMat(ml1) ## ----doscra------------------------------------------------------------------- set.seed(123) sfcrabs = fcrabs[ sample(nrow(fcrabs)), ] ## ----domods------------------------------------------------------------------- sml1 = MLearn( sp~RW, sfcrabs, glmI.logistic(thresh=.5),c(1:30, 51:80),family=binomial) confuMat(sml1) smx1 = MLearn( sp~RW, sfcrabs, glmI.logistic(thresh=.5),xvalSpec("LOG", 5, function(data, clab, iternum) {which(rep(1:5, each=20) == iternum) }), family=binomial) confuMat(smx1) ## ----figdopa,fig=TRUE,width=7,height=7, fig.cap="\\label{fig:figdopa}Pairs plot of the 5 quantitative features of the crabs data.Points are colored by species."---- pairs(crabs[,-c(1:3)], col=ifelse(crabs$sp=="B", "blue", "orange")) ## ----dopc, fig.cap="\\label{fig:figdopa}Pairs plot of the crabs data in principal component coordinates"---- pc1 = prcomp( crabs[,-c(1:3)] ) pairs(pc1$x, col=ifelse(crabs$sp=="B", "blue", "orange")) ## ----figdobi,fig=TRUE, fig.cap="\\label{fig:figdopa} Biplot of the principal component analysis of the crabs data."---- biplot(pc1, choices=2:3, col=c("#80808080", "red")) ## ----checkClaim,echo=FALSE---------------------------------------------------- stopifnot(eval(formals(heatmap)$scale)[1]=="row") ## ----figdohm,fig=TRUE,height=6.5,width=6.5, fig.cap="\\label{fig:figdopa}Heatmap plot of the crabs data, including dendrograms representing hierarchical clustering of the rows and columns."---- X = data.matrix(crabs[,-c(1:3)]) heatmap(t(X), ColSideColors=ifelse(crabs$sp=="O", "orange", "blue"), col = colorRampPalette(c("blue", "white", "red"))(255)) ## ----docl--------------------------------------------------------------------- cl = hclust(dist(X)) tr = cutree(cl,2) table(tr) ## ----dos,fig=TRUE------------------------------------------------------------- library(cluster) sil = silhouette( tr, dist(X) ) plot(sil) ## ----newes-------------------------------------------------------------------- feat2 = t(data.matrix(crabs[, -c(1:3)])) pd2 =new("AnnotatedDataFrame", crabs[,1:2]) crES = new("ExpressionSet",exprs=feat2, phenoData=pd2) crES$spsex = paste(crES$sp,crES$sex, sep=":") table(crES$spsex) ## ----doper-------------------------------------------------------------------- set.seed(1234) crES = crES[ , sample(1:200, size=200, replace=FALSE)] ## ----dotr, message=FALSE------------------------------------------------------ library(rpart) tr1 = MLearn(spsex~., crES, rpartI, 1:140) tr1 confuMat(tr1) ## ----doplTree,fig=TRUE-------------------------------------------------------- plot(RObject(tr1)) text(RObject(tr1)) ## ----doccp,fig=TRUE----------------------------------------------------------- plotcp(RObject(tr1)) ## ----dorf, message=FALSE------------------------------------------------------ set.seed(124) library(randomForest) crES$spsex = factor(crES$spsex) # needed 3/2020 as fails with 'do regression?' error rf1 = MLearn(spsex~., crES, randomForestI, 1:140 ) rf1 cm = confuMat(rf1) cm ## ----dold, message=FALSE------------------------------------------------------ ld1 = MLearn(spsex~., crES, ldaI, 1:140 ) ld1 confuMat(ld1) xvld = MLearn( spsex~., crES, ldaI, xvalSpec("LOG", 5,balKfold.xvspec(5))) confuMat(xvld) ## ----message=FALSE------------------------------------------------------------ nn1 = MLearn(spsex~., crES, nnetI, 1:140, size=3, decay=.1) nn1 RObject(nn1) confuMat(nn1) ## ----doxx, message=FALSE------------------------------------------------------ xvnnBAD = MLearn( spsex~., crES, nnetI, xvalSpec("LOG", 5, function(data, clab,iternum) which( rep(1:5,each=40) == iternum ) ), size=3,decay=.1 ) xvnnGOOD = MLearn( spsex~., crES, nnetI, xvalSpec("LOG", 5,balKfold.xvspec(5) ), size=3, decay=.1 ) ## ----lktann------------------------------------------------------------------- confuMat(xvnnBAD) confuMat(xvnnGOOD) ## ----dnn, message=FALSE------------------------------------------------------- sv1 = MLearn(spsex~., crES, svmI, 1:140) sv1 RObject(sv1) confuMat(sv1) ## ----doxxs, message=FALSE----------------------------------------------------- xvsv = MLearn( spsex~., crES,svmI, xvalSpec("LOG", 5, balKfold.xvspec(5))) ## ----lktasv------------------------------------------------------------------- confuMat(xvsv) ## ----setupALL,cache=TRUE------------------------------------------------------ library("ALL") data("ALL") bALL = ALL[, substr(ALL$BT,1,1) == "B"] fus = bALL[, bALL$mol.biol %in% c("BCR/ABL", "NEG")] fus$mol.biol = factor(fus$mol.biol) fus ## ----getq--------------------------------------------------------------------- mads = apply(exprs(fus),1,mad) fusk = fus[ mads > sort(mads,decr=TRUE)[300], ] fcol =ifelse(fusk$mol.biol=="NEG", "green", "red") ## ----dohALL,fig=TRUE---------------------------------------------------------- heatmap(exprs(fusk), ColSideColors=fcol) ## ----dopcALL------------------------------------------------------------------ PCg = prcomp(t(exprs(fusk))) ## ----lkscre,fig=TRUE---------------------------------------------------------- plot(PCg) ## ----lkprALL,fig=TRUE--------------------------------------------------------- pairs(PCg$x[,1:5],col=fcol,pch=19) ## ----dobiALL,fig=TRUE--------------------------------------------------------- biplot(PCg) ## ----dld1,cache=TRUE, message=FALSE------------------------------------------- dld1 = MLearn( mol.biol~., fusk, dldaI, 1:40 ) ## ----dld2--------------------------------------------------------------------- dld1 confuMat(dld1) ## ----dld3,cache=TRUE---------------------------------------------------------- nnALL = MLearn( mol.biol~., fusk, nnetI, 1:40, size=5, decay=.01, MaxNWts=2000 ) ## ----dld4--------------------------------------------------------------------- confuMat(nnALL) ## ----dld5,cache=TRUE---------------------------------------------------------- rfALL = MLearn( mol.biol~., fusk, randomForestI, 1:40 ) ## ----dld6--------------------------------------------------------------------- rfALL confuMat(rfALL) ## ----getko, message=FALSE----------------------------------------------------- library(keggorthology) data(KOgraph) adj(KOgraph,nodes(KOgraph)[1]) EIP = getKOprobes("Environmental Information Processing") GIP = getKOprobes("Genetic Information Processing") length(intersect(EIP, GIP)) EIPi = setdiff(EIP, GIP) GIP = setdiff(GIP, EIP) EIP = EIPi Efusk = fusk[ featureNames(fusk) %in% EIP, ] Gfusk = fusk[ featureNames(fusk) %in% EIP, ] ## ----dofs, message=FALSE------------------------------------------------------ dldFS = MLearn( mol.biol~., fusk, dldaI, xvalSpec("LOG", 5, balKfold.xvspec(5), fs.absT(30) )) dldFS confuMat(dld1) confuMat(dldFS) ## ----lksess------------------------------------------------------------------- sessionInfo()