MAGeCKFlute provides three different methods for pathway enrichment analysis, including hypergeometric test (HGT), over-representation test (ORT), and gene set enrichment analysis (GSE). MAGeCKFlute also includes several functions for visualizing enrichment results. This vignette introduces the functions related to pathway enrichment analysis.
knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
dev="png", message=FALSE, error=FALSE, warning=TRUE)
Citation: if you use MAGeCKFlute in published research, please cite: Binbin Wang, Mei Wang, Wubing Zhang. “Integrative analysis of pooled CRISPR genetic screens using MAGeCKFlute.” Nature Protocols (2019), doi: 10.1038/s41596-018-0113-7.
if(!"MAGeCKFlute" %in% installed.packages()) BiocManager::install("MAGeCKFlute")
if(!"ggplot2" %in% installed.packages()) BiocManager::install("ggplot2")
library(MAGeCKFlute)
library(ggplot2)
MAGeCKFlute requires a weighted gene list for enrichment analysis.
file1 = file.path(system.file("extdata", package = "MAGeCKFlute"),
"testdata/rra.gene_summary.txt")
gdata = ReadRRA(file1)
genelist = gdata$Score
names(genelist) = gdata$id
genelist = sort(genelist, decreasing = TRUE)
head(genelist)
## Jak1 Stat1 Ifngr2 Ifngr1 B2m H2-D1
## 10.7440 9.0330 5.6097 5.4719 4.3433 4.2110
# Alternative functions EnrichAnalyzer and enrich.HGT.
hgtRes1 = EnrichAnalyzer(genelist[1:200], method = "HGT",
type = "Pathway", organism = "mmu")
head(hgtRes1@result)
## ID Description
## BIOCARTA_IFNG_PATHWAY BIOCARTA_IFNG_PATHWAY IFNG PATHWAY
## REACTOME_877312 REACTOME_877312 Regulation of IFNG signaling
## BIOCARTA_IL22BP_PATHWAY BIOCARTA_IL22BP_PATHWAY IL22BP PATHWAY
## BIOCARTA_IFNA_PATHWAY BIOCARTA_IFNA_PATHWAY IFNA PATHWAY
## BIOCARTA_TID_PATHWAY BIOCARTA_TID_PATHWAY TID PATHWAY
## REACTOME_877300 REACTOME_877300 Interferon gamma signaling
## NES pvalue p.adjust GeneRatio BgRatio
## BIOCARTA_IFNG_PATHWAY 18.307949 9.401935e-11 5.048839e-08 5/107 6/9264
## REACTOME_877312 9.714240 2.440253e-05 4.081607e-03 3/107 9/9264
## BIOCARTA_IL22BP_PATHWAY 15.317503 2.440253e-05 4.081607e-03 3/107 9/9264
## BIOCARTA_IFNA_PATHWAY 13.883283 3.040303e-05 4.081607e-03 4/107 18/9264
## BIOCARTA_TID_PATHWAY 9.455281 4.054116e-05 4.354120e-03 4/107 19/9264
## REACTOME_877300 9.714240 9.109707e-05 6.444888e-03 3/107 12/9264
## geneID
## BIOCARTA_IFNG_PATHWAY 16451/16452/20846/15979/15980
## REACTOME_877312 16452/15979/15980
## BIOCARTA_IL22BP_PATHWAY 16451/16452/20846
## BIOCARTA_IFNA_PATHWAY 16451/15975/15976/20846
## BIOCARTA_TID_PATHWAY 16452/22059/15979/15980
## REACTOME_877300 16452/15979/15980
## geneName Count
## BIOCARTA_IFNG_PATHWAY Jak1/Jak2/Stat1/Ifngr1/Ifngr2 5
## REACTOME_877312 Jak2/Ifngr1/Ifngr2 3
## BIOCARTA_IL22BP_PATHWAY Jak1/Jak2/Stat1 3
## BIOCARTA_IFNA_PATHWAY Jak1/Ifnar1/Ifnar2/Stat1 4
## BIOCARTA_TID_PATHWAY Jak2/Trp53/Ifngr1/Ifngr2 4
## REACTOME_877300 Jak2/Ifngr1/Ifngr2 3
# hgtRes2 = enrich.HGT(genelist[1:200])
# head(hgtRes2@result)
The ORT and GSEA are implemented in the R package clusterProfiler (Yu et al. 2012). So please install it prior to the analysis.
if(!"clusterProfiler" %in% installed.packages()){
BiocManager::install("clusterProfiler")
}
library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.ORT.
ortRes1 = EnrichAnalyzer(genelist[1:200], method = "ORT",
type = "KEGG", organism = "mmu")
head(ortRes1@result)
## ID
## KEGG_mmu05140 KEGG_mmu05140
## KEGG_mmu05235 KEGG_mmu05235
## KEGG_mmu04612 KEGG_mmu04612
## KEGG_mmu05340 KEGG_mmu05340
## KEGG_mmu05212 KEGG_mmu05212
## KEGG_mmu04658 KEGG_mmu04658
## Description NES
## KEGG_mmu05140 Leishmaniasis 17.491523
## KEGG_mmu05235 PD-L1 expression and PD-1 checkpoint pathway in cancer 17.795521
## KEGG_mmu04612 Antigen processing and presentation 9.038750
## KEGG_mmu05340 Primary immunodeficiency 5.717129
## KEGG_mmu05212 Pancreatic cancer 13.069760
## KEGG_mmu04658 Th1 and Th2 cell differentiation 18.307949
## pvalue p.adjust GeneRatio BgRatio
## KEGG_mmu05140 1.530265e-05 0.001254818 7/54 70/4536
## KEGG_mmu05235 5.495082e-04 0.016338254 6/54 88/4536
## KEGG_mmu04612 6.199206e-04 0.016338254 6/54 90/4536
## KEGG_mmu05340 7.969880e-04 0.016338254 4/54 36/4536
## KEGG_mmu05212 1.927944e-03 0.031618287 5/54 76/4536
## KEGG_mmu04658 3.670334e-03 0.044654889 5/54 88/4536
## geneID
## KEGG_mmu05140 16451/20846/15980/15979/16452/17357/16412
## KEGG_mmu05235 16451/20846/15980/15979/16452/13645
## KEGG_mmu04612 12010/14964/21354/21355/19727/53970
## KEGG_mmu05340 21354/21355/19727/53970
## KEGG_mmu05212 16451/20846/12575/13645/22059
## KEGG_mmu04658 16451/20846/15980/15979/16452
## geneName Count
## KEGG_mmu05140 Jak1/Stat1/Ifngr2/Ifngr1/Jak2/Marcksl1/Itgb1 7
## KEGG_mmu05235 Jak1/Stat1/Ifngr2/Ifngr1/Jak2/Egf 6
## KEGG_mmu04612 B2m/H2-D1/Tap1/Tap2/Rfxank/Rfx5 6
## KEGG_mmu05340 Tap1/Tap2/Rfxank/Rfx5 4
## KEGG_mmu05212 Jak1/Stat1/Cdkn1a/Egf/Trp53 5
## KEGG_mmu04658 Jak1/Stat1/Ifngr2/Ifngr1/Jak2 5
# ortRes2 = enrich.ORT(genelist[genelist< -1])
# head(ortRes2@result)
library(clusterProfiler)
# Alternative functions EnrichAnalyzer and enrich.GSE.
gseRes1 = EnrichAnalyzer(genelist, method = "GSEA", type = "Pathway", organism = "mmu")
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (8.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
require(ggplot2)
df = hgtRes1@result
df$logFDR = -log10(df$p.adjust)
p = BarView(df[1:5,], "Description", 'logFDR')
p = p + labs(x = NULL) + coord_flip()
p
# Or use function barplot from enrichplot package
barplot(hgtRes1, showCategory = 5)
## top: up-regulated pathways;
## bottom: down-regulated pathways
EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 1)
EnrichedView(hgtRes1, top = 5, bottom = 0, mode = 2)
dotplot(hgtRes1, showCategory = 5)