## ----include=FALSE------------------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) options(scipen = 1, digits = 2) set.seed(1234) library(projectR) ## ----prcomp, warning=FALSE---------------------------------------------------- # data to define PCs library(ggplot2) data(p.RNAseq6l3c3t) # do PCA on RNAseq6l3c3t expression data pc.RNAseq6l3c3t<-prcomp(t(p.RNAseq6l3c3t)) pcVAR <- round(((pc.RNAseq6l3c3t$sdev)^2/sum(pc.RNAseq6l3c3t$sdev^2))*100,2) dPCA <- data.frame(cbind(pc.RNAseq6l3c3t$x,pd.RNAseq6l3c3t)) #plot pca setCOL <- scale_colour_manual(values = c("blue","black","red"), name="Condition:") setFILL <- scale_fill_manual(values = c("blue","black","red"),guide = FALSE) setPCH <- scale_shape_manual(values=c(23,22,25,25,21,24),name="Cell Line:") pPCA <- ggplot(dPCA, aes(x=PC1, y=PC2, colour=ID.cond, shape=ID.line, fill=ID.cond)) + geom_point(aes(size=days),alpha=.6)+ setCOL + setPCH + setFILL + scale_size_area(breaks = c(2,4,6), name="Day") + theme(legend.position=c(0,0), legend.justification=c(0,0), legend.direction = "horizontal", panel.background = element_rect(fill = "white",colour=NA), legend.background = element_rect(fill = "transparent",colour=NA), plot.title = element_text(vjust = 0,hjust=0,face="bold")) + labs(title = "PCA of hPSC PolyA RNAseq", x=paste("PC1 (",pcVAR[1],"% of varience)",sep=""), y=paste("PC2 (",pcVAR[2],"% of varience)",sep="")) ## ----projectR.prcomp, warning=FALSE------------------------------------------- # data to project into PCs from RNAseq6l3c3t expression data data(p.ESepiGen4c1l) library(ggplot2) PCA2ESepi <- projectR(data = p.ESepiGen4c1l$mRNA.Seq,loadings=pc.RNAseq6l3c3t, full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]]) pd.ESepiGen4c1l<-data.frame(Condition=sapply(colnames(p.ESepiGen4c1l$mRNA.Seq), function(x) unlist(strsplit(x,'_'))[1]),stringsAsFactors=FALSE) pd.ESepiGen4c1l$color<-c(rep("red",2),rep("green",3),rep("blue",2),rep("black",2)) names(pd.ESepiGen4c1l$color)<-pd.ESepiGen4c1l$Cond dPCA2ESepi<- data.frame(cbind(t(PCA2ESepi[[1]]),pd.ESepiGen4c1l)) #plot pca library(ggplot2) setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"), guide = guide_legend(title="Lineage")) pPC2ESepiGen4c1l <- ggplot(dPCA2ESepi, aes(x=PC1, y=PC2, colour=Condition)) + geom_point(size=5) + setEpiCOL + theme(legend.position=c(0,0), legend.justification=c(0,0), panel.background = element_rect(fill = "white"), legend.direction = "horizontal", plot.title = element_text(vjust = 0,hjust=0,face="bold")) + labs(title = "Encode RNAseq in target PC1 & PC2", x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""), y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep="")) ## ----fig.show='hold', fig.width=10, fig.height=5, echo=FALSE, message= FALSE---- library(gridExtra) #grid.arrange(pPCA,pPC2ESepiGen4c1l,nrow=1) ## ----------------------------------------------------------------------------- # get data AP <- get(data("AP.RNAseq6l3c3t")) #CoGAPS run data AP <- AP$Amean # heatmap of gene weights for CoGAPs patterns library(gplots) par(mar=c(1,1,1,1)) pNMF<-heatmap.2(as.matrix(AP),col=bluered, trace='none', distfun=function(c) as.dist(1-cor(t(c))) , cexCol=1,cexRow=.5,scale = "row", hclustfun=function(x) hclust(x, method="average") ) ## ----------------------------------------------------------------------------- # data to project into PCs from RNAseq6l3c3t expression data data('p.ESepiGen4c1l4') data('p.RNAseq6l3c3t') NMF2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=AP,full=TRUE, dataNames=map.ESepiGen4c1l[["GeneSymbols"]]) dNMF2ESepi<- data.frame(cbind(t(NMF2ESepi),pd.ESepiGen4c1l)) #plot pca library(ggplot2) setEpiCOL <- scale_colour_manual(values = c("red","green","blue","black"), guide = guide_legend(title="Lineage")) pNMF2ESepiGen4c1l <- ggplot(dNMF2ESepi, aes(x=X1, y=X2, colour=Condition)) + geom_point(size=5) + setEpiCOL + theme(legend.position=c(0,0), legend.justification=c(0,0), panel.background = element_rect(fill = "white"), legend.direction = "horizontal", plot.title = element_text(vjust = 0,hjust=0,face="bold")) labs(title = "Encode RNAseq in target PC1 & PC2", x=paste("Projected PC1 (",round(PCA2ESepi[[2]][1],2),"% of varience)",sep=""), y=paste("Projected PC2 (",round(PCA2ESepi[[2]][2],2),"% of varience)",sep="")) ## ----correlateR-exp----------------------------------------------------------- # data to data("p.RNAseq6l3c3t") # get genes correlated to T cor2T<-correlateR(genes="T", dat=p.RNAseq6l3c3t, threshtype="N", threshold=10, absR=TRUE) cor2T <- cor2T@corM ### heatmap of genes more correlated to T indx<-unlist(sapply(cor2T,rownames)) indx <- as.vector(indx) colnames(p.RNAseq6l3c3t)<-pd.RNAseq6l3c3t$sampleX library(reshape2) pm.RNAseq6l3c3t<-melt(cbind(p.RNAseq6l3c3t[indx,],indx)) library(gplots) library(ggplot2) library(viridis) pCorT<-ggplot(pm.RNAseq6l3c3t, aes(variable, indx, fill = value)) + geom_tile(colour="gray20", size=1.5, stat="identity") + scale_fill_viridis(option="B") + xlab("") + ylab("") + scale_y_discrete(limits=indx) + ggtitle("Ten genes most highly pos & neg correlated with T") + theme( panel.background = element_rect(fill="gray20"), panel.border = element_rect(fill=NA,color="gray20", size=0.5, linetype="solid"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_blank(), axis.ticks = element_blank(), axis.text = element_text(size=rel(1),hjust=1), axis.text.x = element_text(angle = 90,vjust=.5), legend.text = element_text(color="white", size=rel(1)), legend.background = element_rect(fill="gray20"), legend.position = "bottom", legend.title=element_blank() ) ## ----fig.show='hold', fig.width=10, fig.height=5, echo=FALSE------------------ pCorT ## ----------------------------------------------------------------------------- # data to project into from RNAseq6l3c3t expression data data(p.ESepiGen4c1l) cor2ESepi <- projectR(p.ESepiGen4c1l$mRNA.Seq,loadings=cor2T[[1]],full=FALSE, dataNames=map.ESepiGen4c1l$GeneSymbols) ## ----projectionDriver, message = F, out.width="100%"------ options(width = 60) library(dplyr, warn.conflicts = F) #size-normed, log expression data("microglial_counts") #size-normed, log expression data("glial_counts") #5 pattern cogaps object generated on microglial_counts data("cr_microglial") microglial_fl <- cr_microglial@featureLoadings #the features by which to weight the difference in expression pattern_to_weight <- "Pattern_1" drivers_ci <- projectionDriveR(microglial_counts, #expression matrix glial_counts, #expression matrix loadings = microglial_fl, #feature x pattern dataframe loadingsNames = NULL, pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction display = T, normalize_pattern = T, #normalize feature weights mode = "CI") #confidence interval mode conf_intervals <- drivers_ci$mean_ci[drivers_ci$sig_genes$significant_shared_genes,] str(conf_intervals) drivers_pv <- projectionDriveR(microglial_counts, #expression matrix glial_counts, #expression matrix loadings = microglial_fl, #feature x pattern dataframe loadingsNames = NULL, pattern_name = pattern_to_weight, #column name pvalue = 1e-5, #pvalue before bonferroni correction display = T, normalize_pattern = T, #normalize feature weights mode = "PV") #confidence interval mode difexp <- drivers_pv$difexpgenes str(difexp) ## --------------------------------------------------------- suppressWarnings(library(cowplot)) #order in ascending order of estimates conf_intervals <- conf_intervals %>% mutate(mid = (high+low)/2) %>% arrange(mid) gene_order <- rownames(conf_intervals) #add text labels for top and bottom n genes conf_intervals$label_name <- NA_character_ n <- 2 idx <- c(1:n, (dim(conf_intervals)[1]-(n-1)):dim(conf_intervals)[1]) gene_ids <- gene_order[idx] conf_intervals$label_name[idx] <- gene_ids #the labels above can now be used as ggplot aesthetics plots_list <- plotConfidenceIntervals(conf_intervals, #mean difference in expression confidence intervals sort = F, #should genes be sorted by estimates weights = drivers_ci$normalized_weights[rownames(conf_intervals)], pattern_name = pattern_to_weight, weights_clip = 0.99, weights_vis_norm = "none") pl1 <- plots_list[["ci_estimates_plot"]] + ggrepel::geom_label_repel(aes(label = label_name), max.overlaps = 20, force = 50) pl2 <- plots_list[["weights_heatmap"]] #now plot the weighted differences weighted_conf_intervals <- drivers_ci$weighted_mean_ci[gene_order,] plots_list_weighted <- plotConfidenceIntervals(weighted_conf_intervals, sort = F, weighted = T) pl3 <- plots_list_weighted[["ci_estimates_plot"]] + xlab("Difference in weighted group means") + theme(axis.title.y = element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank()) cowplot::plot_grid(pl1, pl2, pl3, align = "h", rel_widths = c(1,.4, 1), ncol = 3) ## ----fig.width=9, fig.height=10--------------------------- suppressWarnings(library(cowplot)) library(fgsea) library(msigdbr) #manually change FC and pvalue threshold from defaults 0.2, 1e-5 drivers_pv_mod <- pdVolcano(drivers_pv, FC = 0.5, pvalue = 1e-7) difexp_mod <- drivers_pv_mod$difexpgenes str(difexp_mod) #fgsea application #extract unweighted fgsea vector fgseavec <- drivers_pv$fgseavecs$unweightedvec #split into enrichment groups, negative estimates are enriched in the reference matrix (glial), positive are enriched in the test matrix (microglial), take abs value glial_fgsea_vec <- abs(fgseavec[which(fgseavec < 0)]) microglial_fgsea_vec <- abs(fgseavec[which(fgseavec > 0)]) #get FGSEA pathways - selecting subcategory C8 for cell types msigdbr_list = msigdbr::msigdbr(species = "Mus musculus", category = "C8") pathways = split(x = msigdbr_list$ensembl_gene, f = msigdbr_list$gs_name) #run fgsea scoreType positive, all values will be positive glial_fgsea <- fgsea::fgsea(pathways, glial_fgsea_vec, scoreType = "pos") #tidy glial_fgseaResTidy <- glial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>% as_tibble() %>% dplyr::arrange(desc(size)) #plot glial_EnrichmentPlot <- ggplot2::ggplot(glial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + coord_flip() + labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + theme_minimal() glial_EnrichmentPlot microglial_fgsea <- fgsea::fgsea(pathways, microglial_fgsea_vec, scoreType = "pos") #tidy microglial_fgseaResTidy <- microglial_fgsea %>% subset(padj <= 0.05 & size >= 10 & size <= 500) %>% as_tibble() %>% dplyr::arrange(desc(size)) microglial_EnrichmentPlot <- ggplot2::ggplot(microglial_fgseaResTidy, ggplot2::aes(reorder(pathway, NES), NES)) + ggplot2::geom_point(aes(size=size, color = padj)) + coord_flip() + labs(x="Pathway", y="Normalized Enrichment Score", title="Pathway NES from GSEA") + theme_minimal() microglial_EnrichmentPlot