Summary

Over-representation and Gene Sets Enrichment analyses are used for functional interpretation of differentially expressed genes lists.

Start section

Install and load libraries

#install.packages("readr")
#install.packages("dplyr")
#install.packages("stringi")
#BiocManager::install("org.Hs.eg.db")
#BiocManager::install("clusterProfiler")
#BiocManager::install("ggkegg")


# load packages
library(readr)
library(dplyr)
library(stringi)
library(org.Hs.eg.db)
library(clusterProfiler)
library(ggkegg)

Clean environment

# Time stamp
Sys.time()

## [1] "2024-06-26 16:19:50 BST"

rm(list=ls())
graphics.off()

1. Import and prepare data

1.1 Import DESeq2 results

These results were generated during the previous step of the practical session.

base_folder <- "C:/Users/alexe/OneDrive - Cranfield University/External/ebi/short_reads"

deseq2_output <- file.path(base_folder,"data","deseq_gene_lists","DESeq2_FDR_0.1_any_FC_genes.txt")

# Import all genes ranked by p-values from DeSeq2 analysis
# readr::read_tsv  provides additional functionality when reading tsv files
genes_raw.df <- readr::read_tsv(file = deseq2_output)

# Explore data
class(genes_raw.df)

## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"

dim(genes_raw.df)

## [1] 25030     5

head(genes_raw.df)

## # A tibble: 6 × 5
##   gene_id            gene_name baseMean log2FoldChange     padj
##   <chr>              <chr>        <dbl>          <dbl>    <dbl>
## 1 ENSG00000130294.18 KIF1A       42588.          11.1  6.83e-72
## 2 ENSG00000075340.23 ADD2         6056.           9.97 6.37e-53
## 3 ENSG00000105971.15 CAV2         9130.          -8.35 5.25e-51
## 4 ENSG00000163682.17 RPL9        30771.          -5.20 5.33e-49
## 5 ENSG00000167644.12 C19orf33     1720.          -9.05 6.38e-40
## 6 ENSG00000069188.17 SDK2         1197.           8.99 1.55e-39

tail(genes_raw.df)

## # A tibble: 6 × 5
##   gene_id            gene_name       baseMean log2FoldChange  padj
##   <chr>              <chr>              <dbl>          <dbl> <dbl>
## 1 ENSG00000151725.12 CENPU             4333.     0.000153     1.00
## 2 ENSG00000270424.1  ENSG00000270424     11.5   -0.000535     1.00
## 3 ENSG00000224186.8  PITX1-AS1          269.    -0.000167     1.00
## 4 ENSG00000143753.14 DEGS1             3148.     0.0000166    1.00
## 5 ENSG00000132763.15 MMACHC            1167.     0.0000149    1.00
## 6 ENSG00000129315.12 CCNT1             5978.    -0.000000280  1.00

# Clean-up
rm(deseq2_output)

1.2 Add Entrez ID

Entrez ID will be needed for KEGG analysis. Note that some Ensembl IDs can not be mapped to Entrez IDs. This is an expected limitation of converting between the different Gene IDs.

# Remove version number from ENSEMBL ids
genes_ensembl_no_version.df <-  genes_raw.df %>%
  mutate(gene_id = stringi::stri_replace_all(gene_id,regex = "\\.(\\d*)$", ""))

head(genes_ensembl_no_version.df)

## # A tibble: 6 × 5
##   gene_id         gene_name baseMean log2FoldChange     padj
##   <chr>           <chr>        <dbl>          <dbl>    <dbl>
## 1 ENSG00000130294 KIF1A       42588.          11.1  6.83e-72
## 2 ENSG00000075340 ADD2         6056.           9.97 6.37e-53
## 3 ENSG00000105971 CAV2         9130.          -8.35 5.25e-51
## 4 ENSG00000163682 RPL9        30771.          -5.20 5.33e-49
## 5 ENSG00000167644 C19orf33     1720.          -9.05 6.38e-40
## 6 ENSG00000069188 SDK2         1197.           8.99 1.55e-39

tail(genes_ensembl_no_version.df)

## # A tibble: 6 × 5
##   gene_id         gene_name       baseMean log2FoldChange  padj
##   <chr>           <chr>              <dbl>          <dbl> <dbl>
## 1 ENSG00000151725 CENPU             4333.     0.000153     1.00
## 2 ENSG00000270424 ENSG00000270424     11.5   -0.000535     1.00
## 3 ENSG00000224186 PITX1-AS1          269.    -0.000167     1.00
## 4 ENSG00000143753 DEGS1             3148.     0.0000166    1.00
## 5 ENSG00000132763 MMACHC            1167.     0.0000149    1.00
## 6 ENSG00000129315 CCNT1             5978.    -0.000000280  1.00

# Get vector of all ENSEMBL IDs in our data set
ensembl_id_vector <- genes_ensembl_no_version.df$gene_id

# Using org.Hs.eg.db find ENTREZ IDs corresponding to our ENSEMBL IDs
ensembl_to_entrez.df <- clusterProfiler::bitr(geneID = ensembl_id_vector,
                                              fromType = "ENSEMBL",
                                              toType = "ENTREZID",
                                              OrgDb = "org.Hs.eg.db")
# Check result
head(ensembl_to_entrez.df)

##           ENSEMBL ENTREZID
## 1 ENSG00000130294      547
## 2 ENSG00000075340      119
## 3 ENSG00000105971      858
## 4 ENSG00000163682     6133
## 5 ENSG00000167644    64073
## 6 ENSG00000069188    54549

# Rename the ENSEMBL ID column to "gene_id" 
# to make it consistent with the source data 
# (to allow for easy merging with the original dataframe)
ensembl_to_entrez.df <- ensembl_to_entrez.df %>%
  dplyr::rename("gene_id" = ENSEMBL)

# Check result
head(ensembl_to_entrez.df)

##           gene_id ENTREZID
## 1 ENSG00000130294      547
## 2 ENSG00000075340      119
## 3 ENSG00000105971      858
## 4 ENSG00000163682     6133
## 5 ENSG00000167644    64073
## 6 ENSG00000069188    54549

# Add a column with ENTREZ IDs to our dataset
genes_all.df <- genes_ensembl_no_version.df %>%
  left_join(ensembl_to_entrez.df, by = "gene_id")

# Check result
dim(genes_all.df)

## [1] 25575     6

head(genes_all.df)

## # A tibble: 6 × 6
##   gene_id         gene_name baseMean log2FoldChange     padj ENTREZID
##   <chr>           <chr>        <dbl>          <dbl>    <dbl> <chr>   
## 1 ENSG00000130294 KIF1A       42588.          11.1  6.83e-72 547     
## 2 ENSG00000075340 ADD2         6056.           9.97 6.37e-53 119     
## 3 ENSG00000105971 CAV2         9130.          -8.35 5.25e-51 858     
## 4 ENSG00000163682 RPL9        30771.          -5.20 5.33e-49 6133    
## 5 ENSG00000167644 C19orf33     1720.          -9.05 6.38e-40 64073   
## 6 ENSG00000069188 SDK2         1197.           8.99 1.55e-39 54549

# Clean-up (remove unneeded interim data)
rm(genes_raw.df, genes_ensembl_no_version.df,
   ensembl_to_entrez.df, ensembl_id_vector)

2. GSEA

2.1 Order by fold change

During DESeq analysis, we sorted our gene list by adjusted p-value. Although it could be used for GSEA, it is more common to order genes by fold change, or by a combination of fold change and p-value, for instance: lfc / padj .

In the analysis below we will order genes by fold change.

genes_all.df <- genes_all.df %>% 
  arrange(desc(log2FoldChange))

# Check result
head(genes_all.df)

## # A tibble: 6 × 6
##   gene_id         gene_name       baseMean log2FoldChange     padj ENTREZID 
##   <chr>           <chr>              <dbl>          <dbl>    <dbl> <chr>    
## 1 ENSG00000117154 IGSF21              930.           13.4 2.05e- 5 84966    
## 2 ENSG00000101180 HRH3                696.           13.0 2.14e- 6 11255    
## 3 ENSG00000149295 DRD2               2428.           12.9 4.32e- 6 1813     
## 4 ENSG00000185518 SV2B               2208.           12.7 7.36e- 7 9899     
## 5 ENSG00000286190 ENSG00000286190    1027.           12.6 6.57e-19 728392   
## 6 ENSG00000286190 ENSG00000286190    1027.           12.6 6.57e-19 124900388

tail(genes_all.df)

## # A tibble: 6 × 6
##   gene_id         gene_name baseMean log2FoldChange     padj ENTREZID 
##   <chr>           <chr>        <dbl>          <dbl>    <dbl> <chr>    
## 1 ENSG00000184995 IFNE          239.          -11.3 4.13e- 4 338376   
## 2 ENSG00000189052 CGB5          503.          -11.3 7.28e- 4 93659    
## 3 ENSG00000284981 UPK3BL2       350.          -11.8 2.54e- 3 107983993
## 4 ENSG00000228437 LINC02474     410.          -12.0 2.36e- 4 <NA>     
## 5 ENSG00000138271 GPR87         836.          -12.1 1.73e- 9 53836    
## 6 ENSG00000087128 TMPRSS11E     493.          -12.3 9.69e-10 28983

2.2 GO

The input data for Gene Set Enrichment Analysis (GSEA) should be provided as sorted vectors presenting some numeric feature, such as log fold changes, p-values or others. These vectors must be named by the gene IDs. However, different types of analysis may need different IDs. Thus, GO enrichment needs Ensembl IDs, while KEGG enrichment requires Entrez IDs as the gene names.

# Get the sorted log-fold change for GO GSEA 
ensembl_gene_list <- genes_all.df$log2FoldChange

# Add Ensembl IDs as names for GO enrichment
names(ensembl_gene_list) <- genes_all.df$gene_id

# Carry out GO sets enrichment analysis
go_gsea <- gseGO(geneList = ensembl_gene_list,
                 OrgDb = org.Hs.eg.db,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 keyType = "ENSEMBL")

# Explore result
go_gsea.df <- as.data.frame(go_gsea)
colnames(go_gsea.df)

##  [1] "ID"              "Description"     "setSize"         "enrichmentScore" "NES"             "pvalue"          "p.adjust"        "qvalue"          "rank"            "leading_edge"    "core_enrichment"

go_gsea.df <- go_gsea.df %>% 
  dplyr::select(ID,Description,setSize,enrichmentScore,p.adjust) %>% 
  arrange(p.adjust)
head(go_gsea.df)

##                    ID                                  Description setSize enrichmentScore     p.adjust
## GO:0030534 GO:0030534                               adult behavior     119       0.6417390 8.177397e-08
## GO:0051607 GO:0051607                    defense response to virus     267      -0.5149410 8.177397e-08
## GO:0140546 GO:0140546                 defense response to symbiont     267      -0.5149410 8.177397e-08
## GO:0099003 GO:0099003        vesicle-mediated transport in synapse     201       0.5669958 8.177397e-08
## GO:0050804 GO:0050804 modulation of chemical synaptic transmission     390       0.5137496 8.177397e-08
## GO:0099177 GO:0099177       regulation of trans-synaptic signaling     391       0.5129046 8.177397e-08

# Plot GO enrichment results
dotplot(go_gsea)

# Clean-up
rm(ensembl_gene_list)

2.3 KEGG

KEGG enrichment analysis requires Entrez IDs as the gene names.

# Remove rows with missed entrez ids (appr 20% of rows)
filtered_genes_all.df <- genes_all.df %>% filter(!is.na(ENTREZID))

# Get the sorted log-fold changes for GSEA
entrez_gene_list <- filtered_genes_all.df$log2FoldChange

# Add ENTREZID as names for KEGG enrichment
names(entrez_gene_list) <- filtered_genes_all.df$ENTREZID

# Carry out KEGG pathways enrichment analysis
kegg_gsea <- gseKEGG(geneList = entrez_gene_list,
                     organism = 'hsa',
                     pvalueCutoff = 0.05)

# Explore result
enriched_kegg.df <- as.data.frame(kegg_gsea)
colnames(enriched_kegg.df)

##  [1] "ID"              "Description"     "setSize"         "enrichmentScore" "NES"             "pvalue"          "p.adjust"        "qvalue"          "rank"            "leading_edge"    "core_enrichment"

enriched_kegg.df <- enriched_kegg.df %>% 
  dplyr::select(ID,Description,setSize,enrichmentScore,p.adjust) %>% 
  arrange(p.adjust)
head(enriched_kegg.df)

##                ID                            Description setSize enrichmentScore     p.adjust
## hsa04060 hsa04060 Cytokine-cytokine receptor interaction     179      -0.5128795 1.677375e-05
## hsa05323 hsa05323                   Rheumatoid arthritis      67      -0.6477949 9.634365e-05
## hsa04725 hsa04725                    Cholinergic synapse      94       0.6108013 1.537186e-04
## hsa04668 hsa04668                  TNF signaling pathway     108      -0.5569894 1.537186e-04
## hsa05164 hsa05164                            Influenza A     127      -0.5271519 1.537186e-04
## hsa04621 hsa04621    NOD-like receptor signaling pathway     141      -0.5035455 1.606471e-04

# Plot enrichment results
dotplot(kegg_gsea)

# Clean-up
rm(filtered_genes_all.df, entrez_gene_list)

3. Over-representation

3.1 Select Differentially Expressed Genes

Over-representation analysis requires a list of Differentially Expressed Genes (DEGs).

# Select DEGs with
# arbitrary criteria: 
# padj < 0.01 and Fold Change > 2x
degs_0.01_2x_entrez.df <- genes_all.df %>% 
  filter(padj <= 0.01, abs(log2FoldChange) >= 1)

dim(degs_0.01_2x_entrez.df)

## [1] 1415    6

head(degs_0.01_2x_entrez.df)

## # A tibble: 6 × 6
##   gene_id         gene_name       baseMean log2FoldChange     padj ENTREZID 
##   <chr>           <chr>              <dbl>          <dbl>    <dbl> <chr>    
## 1 ENSG00000117154 IGSF21              930.           13.4 2.05e- 5 84966    
## 2 ENSG00000101180 HRH3                696.           13.0 2.14e- 6 11255    
## 3 ENSG00000149295 DRD2               2428.           12.9 4.32e- 6 1813     
## 4 ENSG00000185518 SV2B               2208.           12.7 7.36e- 7 9899     
## 5 ENSG00000286190 ENSG00000286190    1027.           12.6 6.57e-19 728392   
## 6 ENSG00000286190 ENSG00000286190    1027.           12.6 6.57e-19 124900388

tail(degs_0.01_2x_entrez.df)

## # A tibble: 6 × 6
##   gene_id         gene_name baseMean log2FoldChange     padj ENTREZID 
##   <chr>           <chr>        <dbl>          <dbl>    <dbl> <chr>    
## 1 ENSG00000184995 IFNE          239.          -11.3 4.13e- 4 338376   
## 2 ENSG00000189052 CGB5          503.          -11.3 7.28e- 4 93659    
## 3 ENSG00000284981 UPK3BL2       350.          -11.8 2.54e- 3 107983993
## 4 ENSG00000228437 LINC02474     410.          -12.0 2.36e- 4 <NA>     
## 5 ENSG00000138271 GPR87         836.          -12.1 1.73e- 9 53836    
## 6 ENSG00000087128 TMPRSS11E     493.          -12.3 9.69e-10 28983

3.2 GO

# Create list of DEGs' Ensembl IDs
degs_ensembl_gene_list <- degs_0.01_2x_entrez.df$gene_id

# carry out GO over-representation analysis
go_ora <- enrichGO(gene = degs_ensembl_gene_list,
                   OrgDb = org.Hs.eg.db,
                   pvalueCutoff = 0.05,
                   pAdjustMethod = "BH",
                   keyType = "ENSEMBL")

# Save result to data frame
go_ora.df <- as.data.frame(go_ora)
head(go_ora.df)

##                    ID                                      Description GeneRatio   BgRatio       pvalue     p.adjust       qvalue                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          geneID Count
## GO:0022843 GO:0022843 voltage-gated monoatomic cation channel activity    29/933 160/21059 8.914405e-11 7.693132e-08 6.728030e-08                                                                                                                                                                                                                 ENSG00000177301/ENSG00000075043/ENSG00000153253/ENSG00000182674/ENSG00000138622/ENSG00000132639/ENSG00000151079/ENSG00000006071/ENSG00000148408/ENSG00000075429/ENSG00000173826/ENSG00000177272/ENSG00000183873/ENSG00000164588/ENSG00000055118/ENSG00000135519/ENSG00000171303/ENSG00000129159/ENSG00000171385/ENSG00000162687/ENSG00000184185/ENSG00000151067/ENSG00000196876/ENSG00000184408/ENSG00000153956/ENSG00000069431/ENSG00000121361/ENSG00000170745/ENSG00000175294    29
## GO:0005267 GO:0005267                       potassium channel activity    24/933 128/21059 1.823836e-09 7.869853e-07 6.882582e-07                                                                                                                                                                                                                                                                                                 ENSG00000177301/ENSG00000163873/ENSG00000075043/ENSG00000182674/ENSG00000138622/ENSG00000132639/ENSG00000151079/ENSG00000006071/ENSG00000173826/ENSG00000177272/ENSG00000164588/ENSG00000055118/ENSG00000135519/ENSG00000171303/ENSG00000129159/ENSG00000171385/ENSG00000162687/ENSG00000184185/ENSG00000184408/ENSG00000105737/ENSG00000095209/ENSG00000069431/ENSG00000121361/ENSG00000170745    24
## GO:0005249 GO:0005249         voltage-gated potassium channel activity    21/933 104/21059 4.673300e-09 8.981298e-07 7.854596e-07                                                                                                                                                                                                                                                                                                                                                 ENSG00000177301/ENSG00000075043/ENSG00000182674/ENSG00000138622/ENSG00000132639/ENSG00000151079/ENSG00000006071/ENSG00000173826/ENSG00000177272/ENSG00000164588/ENSG00000055118/ENSG00000135519/ENSG00000171303/ENSG00000129159/ENSG00000171385/ENSG00000162687/ENSG00000184185/ENSG00000184408/ENSG00000069431/ENSG00000121361/ENSG00000170745    21
## GO:0005244 GO:0005244    voltage-gated monoatomic ion channel activity    31/933 214/21059 6.230463e-09 8.981298e-07 7.854596e-07                                                                                                                                                                                 ENSG00000177301/ENSG00000075043/ENSG00000153253/ENSG00000182674/ENSG00000138622/ENSG00000132639/ENSG00000151079/ENSG00000006071/ENSG00000148408/ENSG00000075429/ENSG00000173826/ENSG00000177272/ENSG00000183873/ENSG00000164588/ENSG00000055118/ENSG00000135519/ENSG00000171303/ENSG00000177098/ENSG00000129159/ENSG00000171385/ENSG00000162687/ENSG00000184185/ENSG00000151067/ENSG00000196876/ENSG00000184408/ENSG00000161509/ENSG00000153956/ENSG00000069431/ENSG00000121361/ENSG00000170745/ENSG00000175294    31
## GO:0022832 GO:0022832                   voltage-gated channel activity    31/933 214/21059 6.230463e-09 8.981298e-07 7.854596e-07                                                                                                                                                                                 ENSG00000177301/ENSG00000075043/ENSG00000153253/ENSG00000182674/ENSG00000138622/ENSG00000132639/ENSG00000151079/ENSG00000006071/ENSG00000148408/ENSG00000075429/ENSG00000173826/ENSG00000177272/ENSG00000183873/ENSG00000164588/ENSG00000055118/ENSG00000135519/ENSG00000171303/ENSG00000177098/ENSG00000129159/ENSG00000171385/ENSG00000162687/ENSG00000184185/ENSG00000151067/ENSG00000196876/ENSG00000184408/ENSG00000161509/ENSG00000153956/ENSG00000069431/ENSG00000121361/ENSG00000170745/ENSG00000175294    31
## GO:0022839 GO:0022839            monoatomic ion gated channel activity    42/933 354/21059 6.701677e-09 8.981298e-07 7.854596e-07 ENSG00000177301/ENSG00000163873/ENSG00000075043/ENSG00000153253/ENSG00000182674/ENSG00000138622/ENSG00000132639/ENSG00000120251/ENSG00000151079/ENSG00000006071/ENSG00000148408/ENSG00000175344/ENSG00000075429/ENSG00000173826/ENSG00000160716/ENSG00000177272/ENSG00000130054/ENSG00000183873/ENSG00000164588/ENSG00000055118/ENSG00000152578/ENSG00000135519/ENSG00000171303/ENSG00000177098/ENSG00000129159/ENSG00000171385/ENSG00000162687/ENSG00000184185/ENSG00000151067/ENSG00000196876/ENSG00000184408/ENSG00000161509/ENSG00000119782/ENSG00000153956/ENSG00000105737/ENSG00000069431/ENSG00000121361/ENSG00000167895/ENSG00000102287/ENSG00000170745/ENSG00000103534/ENSG00000175294    42

# Plot top results
dotplot(go_ora)

# Clean-up
rm(degs_ensembl_gene_list)

3.3 KEGG

# Create list of Entrez gene IDs
degs_entrez_gene_list <- degs_0.01_2x_entrez.df$ENTREZID

# carry out pathway over-representation analysis
kegg_ora <- enrichKEGG(gene = degs_entrez_gene_list,
                       organism = 'hsa',
                       pvalueCutoff = 0.05,
                       pAdjustMethod = "BH")


# Save result to data frame
kegg_ora.df <- as.data.frame(kegg_ora)
head(kegg_ora.df)

##                                      category                         subcategory       ID                             Description GeneRatio  BgRatio       pvalue     p.adjust       qvalue                                                                                                                                                                geneID Count
## hsa04512 Environmental Information Processing Signaling molecules and interaction hsa04512                ECM-receptor interaction    17/429  89/8842 9.999040e-07 0.0003029709 0.0002705004                                                                                  9899/9900/1288/5649/3680/6382/6385/3339/3912/1101/3691/7057/3914/3909/3694/3371/3918    17
## hsa05146                       Human Diseases       Infectious disease: parasitic hsa05146                              Amoebiasis    15/429 103/8842 1.221142e-04 0.0126009528 0.0112504599                                                                                               1288/5579/384/5296/81/3912/9630/7097/2921/3576/3914/3909/2920/2769/3918    15
## hsa04510                   Cellular Processes     Cellular community - eukaryotes hsa04510                          Focal adhesion    23/429 203/8842 1.247619e-04 0.0126009528 0.0112504599                                                         57144/1288/596/5649/5579/3680/5296/2317/81/824/3912/1101/3691/7057/3914/3909/1956/1950/3694/857/3371/858/3918    23
## hsa04080 Environmental Information Processing Signaling molecules and interaction hsa04080 Neuroactive ligand-receptor interaction    34/429 368/8842 2.120014e-04 0.0160591026 0.0143379863 11255/1813/117/2899/1394/2891/2914/1139/1141/2912/554/4985/2893/1395/6751/1132/2905/2901/7425/2696/5021/5734/718/5724/2564/5618/9002/8698/5028/147/3269/154/2150/5029    34
## hsa04514 Environmental Information Processing Signaling molecules and interaction hsa04514                 Cell adhesion molecules    18/429 157/8842 5.793526e-04 0.0302853477 0.0270395496                                                                     4684/26047/9379/23705/9378/114798/83700/5789/23114/57502/1002/57863/3680/6382/6385/3108/3383/1001    18
## hsa04024 Environmental Information Processing                 Signal transduction hsa04024                  cAMP signaling pathway    23/429 226/8842 5.997099e-04 0.0302853477 0.0270395496                                                    1813/117/108/1394/10021/84152/2891/56670/482/816/2893/1395/4878/6751/775/5296/2905/2696/5021/338442/2353/27198/154    23

# Plot top results
dotplot(kegg_ora)

# Clean-up
rm(degs_entrez_gene_list)

4. Plot a KEGG pathway

TNF signalling (pid=hsa04668) has been found as one of the top enriched KEGG pathways in GSEA analysis. The ggkegg::rawMap function can be used to plot the KEGG pathway diagram with our enriched genes highlighted.

rawMap(kegg_gsea, pid = "hsa04668")

End section

# save image
save.image("Functional_analysis.RData")

# List objects
ls()

##  [1] "base_folder"            "degs_0.01_2x_entrez.df" "enriched_kegg.df"       "genes_all.df"           "go_gsea"                "go_gsea.df"             "go_ora"                 "go_ora.df"              "kegg_category"          "kegg_gsea"              "kegg_ora"               "kegg_ora.df"

# List libraries etc
sessionInfo()

## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8  LC_CTYPE=English_United Kingdom.utf8    LC_MONETARY=English_United Kingdom.utf8 LC_NUMERIC=C                            LC_TIME=English_United Kingdom.utf8    
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggkegg_1.0.13          tidygraph_1.3.1        igraph_2.0.3           XML_3.99-0.16.1        ggraph_2.2.1           ggplot2_3.5.1          clusterProfiler_4.10.1 org.Hs.eg.db_3.18.0    AnnotationDbi_1.64.1   IRanges_2.36.0         S4Vectors_0.40.2       Biobase_2.62.0         BiocGenerics_0.48.1    stringi_1.8.3          dplyr_1.1.4            readr_2.1.5           
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.16.0       jsonlite_1.8.8          magrittr_2.0.3          magick_2.8.3            farver_2.1.1            rmarkdown_2.26          GlobalOptions_0.1.2     fs_1.6.4                zlibbioc_1.48.2         vctrs_0.6.5             memoise_2.0.1           RCurl_1.98-1.14         ggtree_3.10.1           htmltools_0.5.8         curl_5.2.1              gridGraphics_0.5-1      sass_0.4.9              bslib_0.7.0             plyr_1.8.9              cachem_1.0.8            lifecycle_1.0.4         pkgconfig_2.0.3         Matrix_1.6-0            R6_2.5.1                fastmap_1.1.1           gson_0.1.0              GenomeInfoDbData_1.2.11 digest_0.6.33           aplot_0.2.2             enrichplot_1.22.0       colorspace_2.1-0        patchwork_1.2.0         RSQLite_2.3.6           labeling_0.4.3          filelock_1.0.3          fansi_1.0.6             httr_1.4.7              polyclip_1.10-6         compiler_4.3.1          bit64_4.0.5            
##  [42] withr_3.0.0             BiocParallel_1.36.0     viridis_0.6.5           DBI_1.2.2               highr_0.10              ggforce_0.4.2           MASS_7.3-60             rjson_0.2.21            HDO.db_0.99.1           tools_4.3.1             ape_5.8                 scatterpie_0.2.3        glue_1.7.0              nlme_3.1-164            GOSemSim_2.28.1         grid_4.3.1              shadowtext_0.1.3        reshape2_1.4.4          snow_0.4-4              fgsea_1.28.0            generics_0.1.3          gtable_0.3.5            tzdb_0.4.0              tidyr_1.3.1             data.table_1.15.4       hms_1.1.3               utf8_1.2.4              XVector_0.42.0          ggrepel_0.9.5           pillar_1.9.0            stringr_1.5.1           vroom_1.6.5             yulab.utils_0.1.4       splines_4.3.1           tweenr_2.0.3            BiocFileCache_2.10.2    treeio_1.26.0           lattice_0.22-6          bit_4.0.5               tidyselect_1.2.1        GO.db_3.18.0           
##  [83] Biostrings_2.70.3       knitr_1.46              gridExtra_2.3           xfun_0.43               graphlayouts_1.1.1      lazyeval_0.2.2          ggfun_0.1.5             yaml_2.3.8              evaluate_0.23           codetools_0.2-20        tibble_3.2.1            qvalue_2.34.0           ggplotify_0.1.2         cli_3.6.1               munsell_0.5.1           jquerylib_0.1.4         Rcpp_1.0.12             GenomeInfoDb_1.38.8     dbplyr_2.5.0            png_0.1-8               parallel_4.3.1          blob_1.2.4              DOSE_3.28.2             bitops_1.0-7            viridisLite_0.4.2       tidytree_0.4.6          scales_1.3.0            purrr_1.0.2             crayon_1.5.2            GetoptLong_1.0.5        rlang_1.1.1             cowplot_1.1.3           fastmatch_1.1-4         KEGGREST_1.42.0

# Time stamp
Sys.time()

## [1] "2024-06-26 16:22:10 BST"

Over-representation and enrichment analysis: GO and KEGG

EBI Cancer Genomics & Transcriptomics 2024

Alexey Larionov

27 June 2024