Incorporate relational knowledge in a dataset

# Import dataset
data("Tengeler2020", package = "mia")
tse <- Tengeler2020

tse <- agglomerateByRank(tse, rank = "Genus")
#> Duplicated labels were made unique.
tse <- transformAssay(tse, method = "relabundance")

tse
#> class: TreeSummarizedExperiment 
#> dim: 49 27 
#> metadata(1): agglomerated_by_rank
#> assays(2): counts relabundance
#> rownames(49): [Clostridium]_innocuum_group
#>   [Eubacterium]_coprostanoligenes_group ... uncultured_2
#>   uncultured_bacterium
#> rowData names(6): Kingdom Phylum ... Family Genus
#> colnames(27): A110 A12 ... A35 A38
#> colData names(4): patient_status cohort patient_status_vs_cohort
#>   sample_name
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
#> rowLinks: a LinkDataFrame (49 rows)
#> rowTree: 1 phylo tree(s) (49 leaves)
#> colLinks: NULL
#> colTree: NULL

Single-module analysis

data("butyrate", package = "ariadne")

butyrate
#>                   taxname    cpd cpd.name
#> 1           Butyricimonas C00246 butyrate
#> 2             Odoribacter C00246 butyrate
#> 3            Agathobacter C00246 butyrate
#> 4         Anaerobutyricum C00246 butyrate
#> 5  Eubacterium ventriosum C00246 butyrate
#> 6            Anaerostipes C00246 butyrate
#> 7            Butyrivibrio C00246 butyrate
#> 8             Coprococcus C00246 butyrate
#> 9               Roseburia C00246 butyrate
#> 10         Shuttleworthia C00246 butyrate
#> 11         Butyricicoccus C00246 butyrate
#> 12       Faecalibacterium C00246 butyrate
#> 13         Flavonifractor C00246 butyrate
#> 14   Pseudoflavonifractor C00246 butyrate
#> 15          Oscillibacter C00246 butyrate
#> 16        Subdoligranulum C00246 butyrate
tse <- addModules(tse, butyrate, key = "Genus", as = "names")

modules <- agglomerateByModule(tse, by = "rows", group = "butyrate")

modules
#> class: TreeSummarizedExperiment 
#> dim: 1 27 
#> metadata(1): agglomerated_by_rank
#> assays(2): counts relabundance
#> rownames(1): butyrate
#> rowData names(0):
#> colnames(27): A110 A12 ... A35 A38
#> colData names(4): patient_status cohort patient_status_vs_cohort
#>   sample_name
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
#> rowLinks: NULL
#> rowTree: NULL
#> colLinks: NULL
#> colTree: NULL
plotAbundance(modules)
#> Warning: Removed 2 rows containing missing values or values outside the scale range
#> (`geom_bar()`).

plotBoxplot(modules, assay.type = "relabundance", group.by = "patient_status")

Multi-module analysis

graph <- ariadne()

searchPath(graph, taxname ~ bugsig)
#> Path 1:
#> taxname -(BugSigDB)-> bugsig
tax2bugsig <- weavePath(graph, taxname ~ bugsig, init = rowData(tse)$Genus)
#> taxname -(BugSigDB)-> bugsig
#> Warning: names for 871 bugsig ids not found.

head(tax2bugsig)
#>            taxname       bugsig                                 bugsig.name
#> 1      Bacteroides 20603222/1/1 severely-autistic_vs_non-sibling-control_UP
#> 2  Parabacteroides 20603222/1/1 severely-autistic_vs_non-sibling-control_UP
#> 3      Bacteroides 20613793/1/2                    Case-1_vs_Control-1_DOWN
#> 4  Parabacteroides 20613793/1/2                    Case-1_vs_Control-1_DOWN
#> 5      Bacteroides 20613793/2/1                      Case-2_vs_Control-2_UP
#> 6 Faecalibacterium 20613793/2/1                      Case-2_vs_Control-2_UP
mod.names <- levels(tax2bugsig$bugsig.name)

adhd.mods <- mod.names[grepl("ADHD", mod.names)]

adhd.mods
#> [1] "ADHD_vs_Controls_DOWN"                                                               
#> [2] "ADHD_vs_Controls_UP"                                                                 
#> [3] "ADHD_vs_healthy-control_DOWN"                                                        
#> [4] "Medicated-ADHD-(ADHD-+-MPH)_vs_Unmedicated-ADHD_DOWN"                                
#> [5] "Medicated-ADHD-(ADHD-+-MPH)_vs_Unmedicated-ADHD_UP"                                  
#> [6] "Unmedicated-Attention-deficit-hyperactivity-disorder-(ADHD)_vs_Healthy-controls_DOWN"
tse <- addModules(tse, tax2bugsig, key = "Genus", as = "names")
#> Warning in dcast.data.table(modules, widen_form, value.var = "cov", fill = 0):
#> 'fun.aggregate' is NULL, but found duplicate row/column combinations, so
#> defaulting to length(). That is, the variables [taxname, bugsig.name] used in
#> 'formula' do not uniquely identify rows in the input 'data'. In such cases,
#> 'fun.aggregate' is used to derive a single representative value for each
#> combination in the output data.table, for example by summing or averaging
#> (fun.aggregate=sum or fun.aggregate=mean, respectively). Check the resulting
#> table for values larger than 1 to see which combinations were not unique. See
#> ?dcast.data.table for more details.

modules <- agglomerateByModule(tse, by = "rows", group = adhd.mods)

modules
#> class: TreeSummarizedExperiment 
#> dim: 6 27 
#> metadata(1): agglomerated_by_rank
#> assays(2): counts relabundance
#> rownames(6): ADHD_vs_Controls_DOWN ADHD_vs_Controls_UP ...
#>   Medicated-ADHD-(ADHD-+-MPH)_vs_Unmedicated-ADHD_UP
#>   Unmedicated-Attention-deficit-hyperactivity-disorder-(ADHD)_vs_Healthy-controls_DOWN
#> rowData names(0):
#> colnames(27): A110 A12 ... A35 A38
#> colData names(4): patient_status cohort patient_status_vs_cohort
#>   sample_name
#> reducedDimNames(0):
#> mainExpName: NULL
#> altExpNames(0):
#> rowLinks: NULL
#> rowTree: NULL
#> colLinks: NULL
#> colTree: NULL
plotAbundance(modules)
#> Warning: Removed 37 rows containing missing values or values outside the scale range
#> (`geom_bar()`).

plotAbundanceDensity(
    modules,
    assay.type = "relabundance",
    layout = "density",
    colour.by = "patient_status"
)

idx <- apply(rowData(tse)[adhd.mods], 2L, function(x) which(x != 0L))

to_keep <- lengths(idx) != 0L
idx <- idx[to_keep]
adhd.mods <- adhd.mods[to_keep]

res <- lapply(
    seq_along(adhd.mods),
    function(i) plotAbundance(tse[idx[[i]], ]) + ggtitle(adhd.mods[i])
)
library(patchwork)
wrap_plots(res)
#> Warning: Removed 5 rows containing missing values or values outside the scale range
#> (`geom_bar()`).
#> Warning: Removed 16 rows containing missing values or values outside the scale range
#> (`geom_bar()`).
#> Warning: Removed 18 rows containing missing values or values outside the scale range
#> (`geom_bar()`).
#> Warning: Removed 12 rows containing missing values or values outside the scale range
#> (`geom_bar()`).
#> Warning: Removed 19 rows containing missing values or values outside the scale range
#> (`geom_bar()`).
#> Warning: Removed 46 rows containing missing values or values outside the scale range
#> (`geom_bar()`).

Reproducibility

R session information:

#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] patchwork_1.3.2                 miaViz_1.21.0                   ggraph_2.2.2                   
#>  [4] ggplot2_4.0.3                   mia_1.19.8                      TreeSummarizedExperiment_2.21.0
#>  [7] Biostrings_2.81.1               XVector_0.53.0                  SingleCellExperiment_1.35.0    
#> [10] MultiAssayExperiment_1.39.0     SummarizedExperiment_1.43.0     Biobase_2.73.1                 
#> [13] GenomicRanges_1.65.0            Seqinfo_1.3.0                   IRanges_2.47.0                 
#> [16] S4Vectors_0.51.1                BiocGenerics_0.59.0             generics_0.1.4                 
#> [19] MatrixGenerics_1.25.0           matrixStats_1.5.0               ariadne_0.2.3                  
#> [22] BiocStyle_2.41.0               
#> 
#> loaded via a namespace (and not attached):
#>   [1] splines_4.6.0               MultiFactor_0.1.2           ggplotify_0.1.3             filelock_1.0.3             
#>   [5] tibble_3.3.1                polyclip_1.10-7             XML_3.99-0.23               DirichletMultinomial_1.55.0
#>   [9] lifecycle_1.0.5             httr2_1.2.2                 vroom_1.7.1                 lattice_0.22-9             
#>  [13] MASS_7.3-65                 SnowballC_0.7.1             magrittr_2.0.5              sass_0.4.10                
#>  [17] rmarkdown_2.31              jquerylib_0.1.4             yaml_2.3.12                 otel_0.2.0                 
#>  [21] DBI_1.3.0                   RColorBrewer_1.1-3          abind_1.4-8                 purrr_1.2.2                
#>  [25] yulab.utils_0.2.4           tweenr_2.0.3                rappdirs_0.3.4              gdtools_0.5.0              
#>  [29] ggrepel_0.9.8               tokenizers_0.3.0            irlba_2.3.7                 tidytree_0.4.7             
#>  [33] rentrez_1.2.4               vegan_2.7-3                 pkgdown_2.2.0               permute_0.9-10             
#>  [37] DelayedMatrixStats_1.35.0   codetools_0.2-20            DelayedArray_0.39.1         scuttle_1.23.0             
#>  [41] ggforce_0.5.0               tidyselect_1.2.1            aplot_0.2.9                 farver_2.1.2               
#>  [45] ScaledMatrix_1.21.0         viridis_0.6.5               BiocFileCache_3.3.0         jsonlite_2.0.0             
#>  [49] BiocNeighbors_2.7.2         decontam_1.33.0             tidygraph_1.3.1             scater_1.41.1              
#>  [53] systemfonts_1.3.2           tools_4.6.0                 ggnewscale_0.5.2            progress_1.2.3             
#>  [57] treeio_1.37.0               ragg_1.5.2                  Rcpp_1.1.1-1.1              glue_1.8.1                 
#>  [61] gridExtra_2.3               SparseArray_1.13.2          xfun_0.57                   mgcv_1.9-4                 
#>  [65] dplyr_1.2.1                 withr_3.0.2                 BiocManager_1.30.27         fastmap_1.2.0              
#>  [69] bluster_1.23.0              digest_0.6.39               rsvd_1.0.5                  R6_2.6.1                   
#>  [73] gridGraphics_0.5-1          textshaping_1.0.5           RSQLite_3.52.0              ecodive_2.2.6              
#>  [77] tidyr_1.3.2                 fontLiberation_0.1.0        data.table_1.18.4           DECIPHER_3.9.0             
#>  [81] prettyunits_1.2.0           graphlayouts_1.2.3          httr_1.4.8                  htmlwidgets_1.6.4          
#>  [85] S4Arrays_1.13.0             pkgconfig_2.0.3             gtable_0.3.6                blob_1.3.0                 
#>  [89] S7_0.2.2                    janeaustenr_1.0.0           htmltools_0.5.9             fontBitstreamVera_0.1.1    
#>  [93] bookdown_0.46               scales_1.4.0                png_0.1-9                   ggfun_0.2.0                
#>  [97] knitr_1.51                  tzdb_0.5.0                  reshape2_1.4.5              rncl_0.8.9                 
#> [101] nlme_3.1-169                curl_7.1.0                  cachem_1.1.0                stringr_1.6.0              
#> [105] parallel_4.6.0              vipor_0.4.7                 arrow_24.0.0                desc_1.4.3                 
#> [109] pillar_1.11.1               grid_4.6.0                  vctrs_0.7.3                 BiocSingular_1.29.0        
#> [113] dbplyr_2.5.2                beachmat_2.29.0             cluster_2.1.8.2             beeswarm_0.4.0             
#> [117] evaluate_1.0.5              readr_2.2.0                 cli_3.6.6                   compiler_4.6.0             
#> [121] rlang_1.2.0                 crayon_1.5.3                tidytext_0.4.3              labeling_0.4.3             
#> [125] plyr_1.8.9                  fs_2.1.0                    ggbeeswarm_0.7.3            ggiraph_0.9.6              
#> [129] stringi_1.8.7               viridisLite_0.4.3           BiocParallel_1.47.0         assertthat_0.2.1           
#> [133] lazyeval_0.2.3              fontquiver_0.2.1            Matrix_1.7-5                hms_1.1.4                  
#> [137] sparseMatrixStats_1.25.0    bit64_4.8.0                 KEGGREST_1.53.0             igraph_2.3.1               
#> [141] memoise_2.0.1               bslib_0.10.0                rotl_3.1.1                  ggtree_4.3.0               
#> [145] bit_4.6.0                   ape_5.8-1