library(volcalc)
library(dplyr) #for left_join()
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:ChemmineR':
#>
#> groups
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
The volcalc
package can be used to download .mol files
directly from KEGG given either compound IDs or pathway IDs.
First, choose a directory to download files to. For this vignette, we will use a temporary directory, but you should choose somewhere in your project.
dl_path <- tempdir()
Single compound usage
You can search KEGG for compunds at https://www.genome.jp/kegg/compound/ to find their KEGG IDs starting with a “C”.
Let’s download .mol files for two compounds, jasmonic acid and methyl
jasmonate, with KEGG IDs C08491 and C11512, respectively, using
the volcalc
function get_mol_kegg()
.
mols <- get_mol_kegg(compound_ids = c("C08491", "C11512"), dir = dl_path)
mols
#> # A tibble: 2 × 2
#> compound_id mol_path
#> <chr> <fs::path>
#> 1 C08491 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/C08491.mol
#> 2 C11512 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/C11512.mol
The data frame returned by get_mol_kegg()
contains the
paths the files were downloaded to in mol_path
, making for
convenient passage on to the volcalc
function
calc_vol()
.
rvi <- calc_vol(mols$mol_path)
rvi
#> # A tibble: 2 × 5
#> mol_path formula name rvi category
#> <chr> <chr> <chr> <dbl> <fct>
#> 1 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/C08491.mol C12H18O3 (-)-Jasmonic acid 1.84 moderate
#> 2 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/C11512.mol C13H20O3 Methyl jasmonate 3.81 high
calc_vol()
also returns the file paths, so these two
data frames can be easily joined.
Pathway usage
We can download single or multiple compounds with
compound_ids
, but we can also download all compounds
associated with a KEGG pathway with pathway_ids
. Let’s
download the entire alpha-linolenic acid metabolism pathway (map00592) that the above
two compounds are part of.
alam_pathway <- get_mol_kegg(pathway_ids = "map00592", dir = dl_path)
head(alam_pathway)
#> # A tibble: 6 × 3
#> pathway_id compound_id mol_path
#> <chr> <chr> <fs::path>
#> 1 map00592 C00157 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/map00592/C00157.mol
#> 2 map00592 C01226 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/map00592/C01226.mol
#> 3 map00592 C04672 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/map00592/C04672.mol
#> 4 map00592 C04780 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/map00592/C04780.mol
#> 5 map00592 C04785 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/map00592/C04785.mol
#> 6 map00592 C06427 /var/folders/wr/by_lst2d2fngf67mknmgf4340000gn/T/RtmpKwerlt/map00592/C06427.mol
dim(alam_pathway)
#> [1] 44 3
Notice that this returns pathway IDs and compound IDs. We can do the
same as above and pass the mol_path
column to
calc_vol()
and then join the resulting data frame and do
some basic data wrangling to find the top 10 most volatile compounds in
that pathway.
rvi_path <- calc_vol(alam_pathway$mol_path)
#> Warning in FUN(X[[i]], ...): Possible OpenBabel errors detected and only NAs returned.
#> Run with `validate = FALSE` to ignore this.
left_join(alam_pathway, rvi_path, by = join_by(mol_path)) %>%
select(-mol_path) %>%
#arrange from most to least volatile
arrange(desc(rvi)) %>%
#take just the top 10
slice_head(n = 10)
#> # A tibble: 10 × 6
#> pathway_id compound_id formula name rvi category
#> <chr> <chr> <chr> <chr> <dbl> <fct>
#> 1 map00592 C16310 C6H10O 3-Hexenal 7.32 high
#> 2 map00592 C19757 C8H14O2 (3Z)-Hex-3-en-1-yl acetate 6.75 high
#> 3 map00592 C08492 C6H12O 3-Hexenol 6.45 high
#> 4 map00592 C16323 C9H14O 3,6-Nonadienal 6.05 high
#> 5 map00592 C11512 C13H20O3 Methyl jasmonate 3.81 high
#> 6 map00592 C16318 C13H20O3 (+)-7-Isomethyljasmonate 3.81 high
#> 7 map00592 C16322 C9H16O3 9-Oxononanoic acid 2.77 high
#> 8 map00592 C16343 C17H28O Heptadecatrienal 2.69 high
#> 9 map00592 C08491 C12H18O3 (-)-Jasmonic acid 1.84 moderate
#> 10 map00592 C16317 C12H18O3 (+)-7-Isojasmonic acid 1.84 moderate