1 Installation

You can install the current release version of NetActivityData with:

if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("NetActivityData")

You can install the development version of NetActivity from GitHub with:

# install.packages("devtools")
devtools::install_github("yocra3/NetActivityData")
library(NetActivityData)

2 Introduction

This package contains pre-trained models for their use with NetActivity. The package currently contains two models based on GO Biological Processes and KEGG pathways. One model was trained using GTEx data (gtex_gokegg) and the other with TCGA data (tcga_gokegg). Each model contains two objects: the matrix weights and the gene set annotation.

3 GTEx model

The GTEx model has the weights coded in gtex_gokegg, a matrix containing 1,518 gene sets and 8,758 genes (using ENSEMBL ids):

data(gtex_gokegg)
gtex_gokegg[1:5, c(1:4, 554)]
##            ENSG00000000003 ENSG00000000419 ENSG00000000938 ENSG00000000971
## GO:0006734               0               0               0               0
## GO:0008340               0               0               0               0
## GO:0014854               0               0               0               0
## GO:0019081               0               0               0               0
## GO:0019835               0               0               0               0
##            ENSG00000067057
## GO:0006734      0.02098886
## GO:0008340      0.00000000
## GO:0014854      0.00000000
## GO:0019081      0.00000000
## GO:0019835      0.00000000

The annotation is encoded in the gtex_gokegg_annot object, a data.frame containing the gene set full names and the genes’ weights:

data(gtex_gokegg_annot)
head(gtex_gokegg_annot)
##                                                                                     Term
## GO:0000012                                                    single strand break repair
## GO:0000027                                              ribosomal large subunit assembly
## GO:0000028                                              ribosomal small subunit assembly
## GO:0000054                                         ribosomal subunit export from nucleus
## GO:0000076                                          DNA replication checkpoint signaling
## GO:0000083 regulation of transcription involved in G1/S transition of mitotic cell cycle
##               GeneSet
## GO:0000012 GO:0000012
## GO:0000027 GO:0000027
## GO:0000028 GO:0000028
## GO:0000054 GO:0000054
## GO:0000076 GO:0000076
## GO:0000083 GO:0000083
##                                                                                                                                                                                                                                                                                                                                                          Weights
## GO:0000012                                                                                                                                                                                        0.035567194, 0.061123546, 0.024967987, -0.004071241, 0.494375467, -0.046093885, 0.072732233, -0.025384221, 0.109660730, -0.044479407, 0.091808565, 0.088382937
## GO:0000027 0.043507185, -0.123229168, -0.023638910, 0.072557405, -0.101902224, -0.038840789, -0.030902321, -0.021197300, 0.006503435, -0.154806212, -0.058891505, -0.114592746, 0.028564245, -0.066181295, 0.002416282, -0.071685642, 0.086436518, -0.037114877, -0.058456846, -0.094926730, -0.061880082, -0.094013929, 0.010518053, -0.189007401, -0.026227856
## GO:0000028                                                                             -0.0003988848, -0.0171362720, -0.0988358855, -0.0859392509, 0.0181661155, -0.0194991399, 0.0122082811, -0.0557554252, -0.0552943945, -0.0117528830, -0.1363112330, -0.0411239639, 0.0615135580, -0.0398536474, -0.1141668037, -0.0241925009, -0.0163414348, -0.0727257803
## GO:0000054                                                                                                                                                              0.138614878, 0.079119049, 0.110984951, 0.003106124, -0.059814066, -0.005902387, 0.166426450, -0.008747268, 0.176319689, 0.026850102, 0.142850861, -0.042719726, 0.135307029, 0.027363412
## GO:0000076                                                                                                       1.641723e-02, 5.333161e-06, 1.621200e-01, 7.397231e-02, 1.327412e-01, 1.085996e-01, 3.540195e-02, 2.089668e-01, 5.582866e-02, -8.135105e-03, -5.507286e-02, 3.667383e-02, 7.770374e-02, -3.061506e-02, 5.533135e-03, 6.281158e-02, 2.114120e-02
## GO:0000083                                                              -0.115745835, -0.099530697, -0.007893468, -0.033872891, 0.079099245, -0.025759768, -0.087829337, -0.039553192, 0.100601844, 0.050587658, 0.105372071, 0.112940207, 0.165294915, 0.079828963, -0.151657730, 0.067357212, 0.030260293, 0.148316681, -0.047926873, 0.116133198, 0.148266777
##                                                                                                                                                                                                                                                                                                                                                   Weights_SYMBOL
## GO:0000012                                                                                                                                                                                        0.035567194, 0.061123546, 0.024967987, -0.004071241, 0.494375467, -0.046093885, 0.072732233, -0.025384221, 0.109660730, -0.044479407, 0.091808565, 0.088382937
## GO:0000027 0.043507185, -0.123229168, -0.023638910, 0.072557405, -0.101902224, -0.038840789, -0.030902321, -0.021197300, 0.006503435, -0.154806212, -0.058891505, -0.114592746, 0.028564245, -0.066181295, 0.002416282, -0.071685642, 0.086436518, -0.037114877, -0.058456846, -0.094926730, -0.061880082, -0.094013929, 0.010518053, -0.189007401, -0.026227856
## GO:0000028                                                                             -0.0003988848, -0.0171362720, -0.0988358855, -0.0859392509, 0.0181661155, -0.0194991399, 0.0122082811, -0.0557554252, -0.0552943945, -0.0117528830, -0.1363112330, -0.0411239639, 0.0615135580, -0.0398536474, -0.1141668037, -0.0241925009, -0.0163414348, -0.0727257803
## GO:0000054                                                                                                                                                              0.138614878, 0.079119049, 0.110984951, 0.003106124, -0.059814066, -0.005902387, 0.166426450, -0.008747268, 0.176319689, 0.026850102, 0.142850861, -0.042719726, 0.135307029, 0.027363412
## GO:0000076                                                                                                       1.641723e-02, 5.333161e-06, 1.621200e-01, 7.397231e-02, 1.327412e-01, 1.085996e-01, 3.540195e-02, 2.089668e-01, 5.582866e-02, -8.135105e-03, -5.507286e-02, 3.667383e-02, 7.770374e-02, -3.061506e-02, 5.533135e-03, 6.281158e-02, 2.114120e-02
## GO:0000083                                                              -0.115745835, -0.099530697, -0.007893468, -0.033872891, 0.079099245, -0.025759768, -0.087829337, -0.039553192, 0.100601844, 0.050587658, 0.105372071, 0.112940207, 0.165294915, 0.079828963, -0.151657730, 0.067357212, 0.030260293, 0.148316681, -0.047926873, 0.116133198, 0.148266777

4 TCGA model

The TCGA model has the weights coded in tcga_gokegg, a matrix containing 1,518 gene sets and 8,758 genes (using ENSEMBL ids):

data(tcga_gokegg)
tcga_gokegg[1:5, c(1:4, 554)]
##            ENSG00000000003 ENSG00000000419 ENSG00000000938 ENSG00000000971
## GO:0006734               0               0               0               0
## GO:0008340               0               0               0               0
## GO:0014854               0               0               0               0
## GO:0019081               0               0               0               0
## GO:0019835               0               0               0               0
##            ENSG00000067057
## GO:0006734      0.07827249
## GO:0008340      0.00000000
## GO:0014854      0.00000000
## GO:0019081      0.00000000
## GO:0019835      0.00000000

The annotation is encoded in the tcga_gokegg_annot object, a data.frame containing the gene set full names and the genes’ weights:

data(tcga_gokegg_annot)
head(tcga_gokegg_annot)
##                                                                                     Term
## GO:0000012                                                    single strand break repair
## GO:0000027                                              ribosomal large subunit assembly
## GO:0000028                                              ribosomal small subunit assembly
## GO:0000054                                         ribosomal subunit export from nucleus
## GO:0000076                                          DNA replication checkpoint signaling
## GO:0000083 regulation of transcription involved in G1/S transition of mitotic cell cycle
##               GeneSet
## GO:0000012 GO:0000012
## GO:0000027 GO:0000027
## GO:0000028 GO:0000028
## GO:0000054 GO:0000054
## GO:0000076 GO:0000076
## GO:0000083 GO:0000083
##                                                                                                                                                                                                                                                                                                                                             Weights
## GO:0000012                                                                                                                                                                             0.037380856, 0.170272991, -0.105982788, 0.014313349, 0.075246379, 0.068831243, -0.001610893, 0.037867442, 0.218389213, 0.075989209, 0.182520196, 0.148878604
## GO:0000027 -0.021554671, 0.034113534, 0.087525211, 0.097704180, 0.041802302, 0.042129379, -0.011777161, 0.035727665, 0.004678536, 0.071312360, 0.005201661, 0.046542719, 0.061286055, 0.063334674, -0.053743202, 0.004582237, 0.026003314, 0.094573326, 0.022740463, 0.068600073, 0.011752754, 0.024453089, -0.008651564, 0.117635503, -0.007196402
## GO:0000028                                                                   0.0083221430, -0.1612766981, -0.0591099821, 0.0290649142, -0.1530193388, -0.0118804062, 0.0009691475, 0.0543136485, -0.0855586156, -0.1653613597, -0.1471685767, 0.0327429660, -0.0435673483, -0.0315283760, -0.0861951709, 0.0785642788, -0.0035195511, -0.0476780795
## GO:0000054                                                                                                                                            -0.096075319, 0.053375252, -0.091739178, -0.037047327, 0.097270645, 0.126659289, -0.163486317, -0.106171839, -0.155925184, -0.003214519, 0.017335515, -0.162202299, -0.241927966, 0.098997459
## GO:0000076                                                                                                 -0.042798948, -0.066028319, -0.114868999, -0.074630216, -0.177659094, -0.092202038, -0.120734461, -0.104675233, -0.036104515, 0.011872039, 0.064122319, -0.101170361, -0.051496901, 0.004985386, 0.009211170, -0.110245273, -0.006803449
## GO:0000083                                                  0.048898503, 0.099151552, -0.018865099, -0.083110079, 0.113711558, -0.010647335, -0.077533476, 0.012446340, 0.054787982, 0.004723188, -0.070184685, 0.126753464, 0.136158481, 0.032541510, -0.018822307, 0.031471323, -0.099007942, 0.055620976, 0.229699358, 0.009097925, -0.041929591
##                                                                                                                                                                                                                                                                                                                                      Weights_SYMBOL
## GO:0000012                                                                                                                                                                             0.037380856, 0.170272991, -0.105982788, 0.014313349, 0.075246379, 0.068831243, -0.001610893, 0.037867442, 0.218389213, 0.075989209, 0.182520196, 0.148878604
## GO:0000027 -0.021554671, 0.034113534, 0.087525211, 0.097704180, 0.041802302, 0.042129379, -0.011777161, 0.035727665, 0.004678536, 0.071312360, 0.005201661, 0.046542719, 0.061286055, 0.063334674, -0.053743202, 0.004582237, 0.026003314, 0.094573326, 0.022740463, 0.068600073, 0.011752754, 0.024453089, -0.008651564, 0.117635503, -0.007196402
## GO:0000028                                                                   0.0083221430, -0.1612766981, -0.0591099821, 0.0290649142, -0.1530193388, -0.0118804062, 0.0009691475, 0.0543136485, -0.0855586156, -0.1653613597, -0.1471685767, 0.0327429660, -0.0435673483, -0.0315283760, -0.0861951709, 0.0785642788, -0.0035195511, -0.0476780795
## GO:0000054                                                                                                                                            -0.096075319, 0.053375252, -0.091739178, -0.037047327, 0.097270645, 0.126659289, -0.163486317, -0.106171839, -0.155925184, -0.003214519, 0.017335515, -0.162202299, -0.241927966, 0.098997459
## GO:0000076                                                                                                 -0.042798948, -0.066028319, -0.114868999, -0.074630216, -0.177659094, -0.092202038, -0.120734461, -0.104675233, -0.036104515, 0.011872039, 0.064122319, -0.101170361, -0.051496901, 0.004985386, 0.009211170, -0.110245273, -0.006803449
## GO:0000083                                                  0.048898503, 0.099151552, -0.018865099, -0.083110079, 0.113711558, -0.010647335, -0.077533476, 0.012446340, 0.054787982, 0.004723188, -0.070184685, 0.126753464, 0.136158481, 0.032541510, -0.018822307, 0.031471323, -0.099007942, 0.055620976, 0.229699358, 0.009097925, -0.041929591

The columns Weights and Weights_SYMBOL are lists, where each element is a named vector:

tcga_gokegg_annot$Weights[[1]]
## ENSG00000042088 ENSG00000049167 ENSG00000073050 ENSG00000096717 ENSG00000118245 
##     0.037380856     0.170272991    -0.105982788     0.014313349     0.075246379 
## ENSG00000132604 ENSG00000137074 ENSG00000143799 ENSG00000169621 ENSG00000174405 
##     0.068831243    -0.001610893     0.037867442     0.218389213     0.075989209 
## ENSG00000225830 ENSG00000254469 
##     0.182520196     0.148878604
tcga_gokegg_annot$Weights_SYMBOL[[1]]
##         TDP1        ERCC8        XRCC1        SIRT1         TNP1        TERF2 
##  0.037380856  0.170272991 -0.105982788  0.014313349  0.075246379  0.068831243 
##         APTX        PARP1         APLF         LIG4        ERCC6       XNDC1N 
## -0.001610893  0.037867442  0.218389213  0.075989209  0.182520196  0.148878604
sessionInfo()
## R version 4.4.0 beta (2024-04-15 r86425)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] NetActivityData_1.6.0 BiocStyle_2.32.0     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35       R6_2.5.1            bookdown_0.39      
##  [4] fastmap_1.1.1       xfun_0.43           cachem_1.0.8       
##  [7] knitr_1.46          htmltools_0.5.8.1   rmarkdown_2.26     
## [10] lifecycle_1.0.4     cli_3.6.2           sass_0.4.9         
## [13] jquerylib_0.1.4     compiler_4.4.0      tools_4.4.0        
## [16] evaluate_0.23       bslib_0.7.0         yaml_2.3.8         
## [19] BiocManager_1.30.22 jsonlite_1.8.8      rlang_1.1.3