Graph-based entity-set enrichment analysis

Toru Maruyama

2023-12-10

library(CellInteractomeR)
library(neo2R)
library(kableExtra)
library(tidyverse)
library(igraph)
library(ggraph)
## ℹ Loading CellInteractomeR
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%()      masks CellInteractomeR::%--%()
## ✖ dplyr::as_data_frame() masks tibble::as_data_frame(), CellInteractomeR::as_data_frame()
## ✖ purrr::compose()       masks CellInteractomeR::compose()
## ✖ tidyr::crossing()      masks CellInteractomeR::crossing()
## ✖ dplyr::filter()        masks CellInteractomeR::filter(), stats::filter()
## ✖ dplyr::group_rows()    masks kableExtra::group_rows()
## ✖ dplyr::lag()           masks stats::lag()
## ✖ lubridate::show()      masks CellInteractomeR::show(), methods::show()
## ✖ purrr::simplify()      masks CellInteractomeR::simplify()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'igraph'
## 
## 
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     crossing
## 
## 
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## 
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## 
## The following object is masked from 'package:base':
## 
##     union

Connect database

  • url: http://localhost:7474 is the default URL set in Neo4j
  • please use your username and password instead
con = connect_database(
  url = "http://localhost:7474",
  username = "neo4j",
  password = "yourpassword"
)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## ℹ The deprecated feature was likely used in the CellInteractomeR package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Graph-based entity-set enrichment analysis (GrapeSEA)

Example: Find characteristics of disease-associated metabolites

Loading data

The following codes load stool metabolome/microbiome data from IBDMDB (Lloyd-Price et al. 2019. Nat Med).

# Example file: Downloaded from http://cellinteractome.com/download/IBD.tar.gz
indir = "~/Cell-Interactome/test/IBD/"

# metadata
df_meta = read.table(paste0(indir, "metadata.SampleMap.txt"), sep="\t", header=T, stringsAsFactors=F) %>%
  left_join(read.table(paste0(indir, "metadata.Sample.txt"), sep="\t", header=T, stringsAsFactors=F, check.names=F), by="SampleID") %>%
  left_join(read.table(paste0(indir, "metadata.Patient.txt"), sep="\t", header=T, stringsAsFactors=F, check.names=F), by="PatientID")

# microbiome
df_mic = read.table(paste0(indir, "microbiome.Ratio.txt"), sep="\t", quote="", header=T, row.names=1, stringsAsFactors=F)
df_mic_UC = df_mic[,df_meta %>% filter(datatype == "microbiome" & `Disease[category]` == "UC") %>% pull(DataID)] # Ulcerative colitis
df_mic_CD = df_mic[,df_meta %>% filter(datatype == "microbiome" & `Disease[category]` == "CD") %>% pull(DataID)] # Crohn's diesase
df_mic_HC = df_mic[,df_meta %>% filter(datatype == "microbiome" & `Disease[category]` == "HC") %>% pull(DataID)]  # Healthy control

# metabolome
df_met = read.table(paste0(indir, "metabolome.PPM.txt"), sep="\t", quote="", header=T, row.names=1, stringsAsFactors=F)
df_met = log2(df_met + 1)
df_met_UC = df_met[,df_meta %>% filter(datatype == "metabolome" & `Disease[category]` == "UC") %>% pull(DataID)] # Ulcerative colitis
df_met_CD = df_met[,df_meta %>% filter(datatype == "metabolome" & `Disease[category]` == "CD") %>% pull(DataID)] # Crohn's diesase
df_met_HC = df_met[,df_meta %>% filter(datatype == "metabolome" & `Disease[category]` == "HC") %>% pull(DataID)]  # Healthy control

# each matrix has abundance information of metabolites/microbes (row) in UC/CD/HC samples (columns)
df_met_UC[1:5, 1:5] %>% kbl() %>% kable_styling()
CSM5FZ48 CSM5FZ4A CSM5FZ4O CSM5MCTZ CSM5MCXB
1-3-7-trimethylurate 5.9883797 5.2926950 6.1368738 1.509103 0.0881997
1-methylguanine 2.1952139 0.9073365 1.6111354 3.415876 3.2256712
1-methylguanosine 0.4529227 0.3294265 0.4166074 0.144726 0.0290270
1-methylhistamine 1.6104255 1.6102796 0.3499537 2.470050 0.0055073
1-methylnicotinamide 0.8148608 2.2736943 0.4262779 1.632002 0.1140218

Differential analysis

This section defines microbes / metabolites differentially abundant in disease conditions.

diff_tests = function(df1, df2, method="t"){
  df_diff = sapply(rownames(df1), function(m){
    x = df1[m, ] %>% as.numeric()
    y = df2[m, ] %>% as.numeric()
    if(method == "t"){
      p = suppressWarnings(t.test(x, y))$p.value
    }else if(method == "wilcox"){
      p = suppressWarnings(wilcox.test(x, y))$p.value
    }
    return(c(mean(x), mean(y), p))
  })
  df_diff = data.frame(t(df_diff))
  colnames(df_diff) = c("Group1", "Group2", "pvalue")
  df_diff = df_diff %>% filter(Group1 > 0 | Group2 > 0) %>% rownames_to_column("id")
  df_diff
}

df_diff_mic_UC = diff_tests(df_mic_UC, df_mic_HC, method="wilcox")
df_diff_mic_CD = diff_tests(df_mic_CD, df_mic_HC, method="wilcox")
df_diff_met_UC = diff_tests(df_met_UC, df_met_HC, method="t")
df_diff_met_CD = diff_tests(df_met_CD, df_met_HC, method="t")
df_diff_met_CD %>% head() %>% kbl() %>% kable_styling()
id Group1 Group2 pvalue
1-3-7-trimethylurate 3.1945227 5.1411523 0.0074653
1-methylguanine 2.7449415 1.6200099 0.0105038
1-methylguanosine 0.5737536 0.3495421 0.1974040
1-methylhistamine 1.4454821 0.6337684 0.0063992
1-methylnicotinamide 1.7323209 0.8318183 0.0071143
1.2.3.4-tetrahydro-beta-carboline-1.3-dicarboxylate 2.1491791 1.6775408 0.3639148
metabolites_up = df_diff_met_CD %>%
  mutate(logFC = Group1 - Group2) %>%
  filter(logFC > 1 & pvalue < 0.05) %>%
  pull(id)

metabolites_down = df_diff_met_CD %>%
  mutate(logFC = Group2 - Group1) %>%
  filter(logFC > 0) %>%
  pull(id)

tmp = cbind(
  df_met_CD[c(metabolites_up, metabolites_down), ],
  df_met_HC[c(metabolites_up, metabolites_down), ]
)
tmp = t(scale(t(tmp)))

figmat = tmp %>%
  as.data.frame() %>%
  rownames_to_column("ID") %>%
  mutate(Group=ifelse(ID %in% metabolites_up, "High in CD", "Low in CD")) %>%
  gather(key="Sample", value="Value", -Group, -ID) %>%
  mutate(SampleGroup=ifelse(Sample %in% colnames(df_met_HC), "HC", "CD"),
         `Scaled value` = ifelse(Value > 2, 2, ifelse(Value < -2, -2, Value))) %>%
  mutate(SampleGroup = factor(SampleGroup, levels=c("CD", "HC")),
         Group = factor(Group, levels=c("High in CD", "Low in CD")))

figA = ggplot(figmat) +
  geom_tile(aes(x=Sample, y=ID, fill=`Scaled value`)) +
  facet_grid(Group~SampleGroup, scales="free", switch="both", space="free") + 
  scale_fill_gradient2(high="#ED7D31", low="#3284BF", mid="white", limits=c(-2,2)) +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        legend.title = element_text(angle=-90, hjust=0.5)) + 
  xlab("Sample") + ylab("Metabolites") +
  guides(fill=guide_colorbar(barwidth = 0.7, title.position="right"))
figA

GrapeSEA on metabolites abundant in CD

gs_met = graph_based_enrichment(
  metabolites_up, 
  entity_class = "Metabolite", 
  path = "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite", 
  background = metabolites_down,
  con = con, 
  config = config, 
  threshold = threshold
)
## Warning in if (is.na(background)) {: the condition has length > 1 and only the
## first element will be used
gs_met %>% filter(pvalue < 0.1) %>% arrange(desc(OR))
##                        pair
## 1     Inflammatory monocyte
## 2            Cycling plasma
## 3                 CD8+ Tc17
## 4                 CD8+ MAIT
## 5                       pDC
## 6         LYVE1+ macrophage
## 7          AREG+ macrophage
## 8           Enteroendocrine
## 9    Non-classical monocyte
## 10        DUOX2+ epithelial
## 11               Enterocyte
## 12                   Goblet
## 13         Adult colonocyte
## 14        BEST4+ epithelial
## 15        Immature pericyte
## 16  Adult venous EC (SELE+)
## 17 Adult arterial capillary
## 18                Stromal 1
## 19                   Paneth
## 20               Cycling TA
##                                                          path target_TRUE
## 1  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 2  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 3  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 4  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 5  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 6  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite          11
## 7  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           9
## 8  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite          10
## 9  Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 10 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 11 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           5
## 12 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           5
## 13 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
## 14 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           6
## 15 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           4
## 16 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           5
## 17 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           3
## 18 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           3
## 19 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           3
## 20 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           8
##    target_FALSE background_TRUE background_FALSE         OR       pvalue
## 1            12               0               27        Inf 4.005947e-04
## 2            12               0               27        Inf 4.005947e-04
## 3            12               0               27        Inf 4.005947e-04
## 4            12               0               27        Inf 4.005947e-04
## 5            12               0               27        Inf 4.005947e-04
## 6             9               1               26 31.7777778 8.920124e-05
## 7            11               1               26 21.2727273 9.114745e-04
## 8            10               2               25 12.5000000 1.663001e-03
## 9            12               2               25  8.3333333 1.107970e-02
## 10           12              18                9  0.3333333 8.371072e-02
## 11           15              15               12  0.2666667 4.352323e-02
## 12           15              15               12  0.2666667 4.352323e-02
## 13           12              20                7  0.2333333 3.427615e-02
## 14           14              18                9  0.2142857 1.893028e-02
## 15           16              15               12  0.2000000 1.823400e-02
## 16           15              17               10  0.1960784 1.736274e-02
## 17           17              15               12  0.1411765 6.446923e-03
## 18           17              15               12  0.1411765 6.446923e-03
## 19           17              15               12  0.1411765 6.446923e-03
## 20           12              23                4  0.1159420 1.870810e-03
##            fdr
## 1  0.002537100
## 2  0.002537100
## 3  0.002537100
## 4  0.002537100
## 5  0.002537100
## 6  0.002537100
## 7  0.004948005
## 8  0.007898976
## 9  0.032386818
## 10 0.159050367
## 11 0.087046454
## 12 0.087046454
## 13 0.076617281
## 14 0.044959411
## 15 0.044959411
## 16 0.044959411
## 17 0.020415255
## 18 0.020415255
## 19 0.020415255
## 20 0.007898976
tmp = get_entities(con, "Cell", return_class=T)
tmp = tibble(CellGroup=names(tmp), Cell=tmp) %>% unnest(Cell)
cell2group = tmp$CellGroup; names(cell2group) = tmp$Cell

cell2color = RColorBrewer::brewer.pal(9, "Set3")
names(cell2color) = tmp$CellGroup %>% unique()

p = 0.2
cell_orders = gs_met %>% filter(!is.null(OR) & pvalue < p) %>% arrange(OR) %>% pull(pair)

figmat = gs_met %>% 
  filter(!is.null(OR) & pvalue < p) %>%
  dplyr::rename(Cell = pair) %>%
  mutate(Cell = factor(Cell, levels=Cell)) %>%
  mutate(target_TRUE_2 = target_TRUE / (target_TRUE + target_FALSE),
         target_FALSE_2 = target_FALSE / (target_TRUE + target_FALSE),
         background_TRUE_2 = background_TRUE / (background_TRUE + background_FALSE),
         background_FALSE_2 = background_FALSE / (background_TRUE + background_FALSE)) %>%
  dplyr::select(Cell, target_TRUE_2, target_FALSE_2, background_TRUE_2, background_FALSE_2) %>%
  gather(key="Group", value="Count", -Cell) %>%
  separate(Group, sep="_", into=c("Group", "Has_relation", "tmp")) %>%
  mutate(Group = ifelse(Group == "target", "Up", "Down"),
         CellGroup = cell2group[as.character(Cell)]) %>%
  mutate(Cell = factor(Cell, levels=cell_orders))

fig1 = ggplot(figmat %>% dplyr::select(Cell, CellGroup) %>% unique()) +
  geom_tile(aes(x=1, y=Cell, fill=CellGroup)) + 
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        legend.title = element_text(angle=-90, hjust=0.5)) +
  #ggsci::scale_fill_npg() +
  scale_fill_manual(values=cell2color) +
  xlab(NULL) + ylab(NULL) +
  guides(fill=guide_legend(title="", keywidth = 0.8, title.position="right"))

fig2 = ggplot(figmat %>% filter((Group == "Up") & (Has_relation == "TRUE"))) +
  geom_bar(aes(x=Count, y=Cell), stat="identity", fill="grey60") +
  theme_minimal() +
  theme(axis.text.y = element_blank(), 
        plot.title=element_text(size=10, hjust=0.5)) +
  scale_x_reverse(limits=c(.8, 0)) +
  ggtitle("Metabolites high in CD") +
  xlab(NULL) + ylab(NULL)

fig3 = ggplot(figmat %>% filter((Group == "Down") & (Has_relation == "TRUE"))) +
  geom_bar(aes(x=Count, y=Cell), stat="identity", fill="grey60") +
  theme_minimal() +
  theme(axis.text.y = element_blank(), 
        plot.title=element_text(size=10, hjust=0.5)) +
  scale_x_continuous(limits=c(0, .9), breaks=seq(0, 0.8, by=0.2)) +
  ggtitle("Metabolites low in CD") +
  xlab(NULL) + ylab(NULL)

fig_legend = cowplot::get_legend(fig1)
fig1 = fig1 + theme(legend.position = "none")

figB = cowplot::plot_grid(fig1, fig2, fig3, fig_legend, nrow=1, align="h", axis="tb", rel_widths=c(1,1,1,0.7))
figB

path = "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite"
g = network_multi_entity(
  paths = path, 
  target_entities = list(Metabolite = c(metabolites_down, metabolites_up), Cell = names(cell2group)),
  con = con,
  config = config,
  states = NULL,
  threshold = threshold,
  output = "igraph",
  return_path = T
)

tmp = data.frame(entity=V(g)$name, type=V(g)$type, stringsAsFactors=F) %>% 
  mutate(cell_group = cell2group[entity]) %>%
  mutate(new_name = ifelse(type == "Cell", paste0("Cell: ", cell_group),
                           ifelse(entity %in% metabolites_up, "Metabolite high in CD", "Metabolite low in CD"))) %>%
  mutate(color=ifelse(type == "Cell", cell2color[cell_group], 
                      ifelse((entity %in% metabolites_up), "black", "grey60"))) %>%
  mutate(shape=ifelse(type == "Cell", 16, 18))

V(g)$new_name = tmp$new_name
tmp = tmp %>% select(new_name, color, shape) %>% unique()
name2color = tmp$color; names(name2color) = tmp$new_name
name2shape = tmp$shape; names(name2shape) = tmp$new_name

figC = ggraph(g, layout="igraph", algorithm="kk") + 
  geom_edge_link(color="grey80", alpha=0.3) +
  geom_node_point(aes(color=new_name, shape=new_name), size=4) +
  scale_color_manual(values = name2color) +
  scale_shape_manual(values = name2shape) +
  theme_void() + 
  theme(legend.title = element_blank(),
        plot.title = element_text(hjust=0.5, size=8)) +
  ggtitle(paste0("Associations between Cell & Metabolites: \n", path))
figC
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# fig = cowplot::plot_grid(
#   cowplot::plot_grid(figA, figC, nrow=1, rel_widths=c(3, 4)), 
#   cowplot::plot_grid(figB, NULL, nrow=1, rel_widths=c(7, 0.1)),
#   ncol=1,
#   rel_heights=c(1,1)
# )
# fig

GrapeSEA on metabolites abundant in UC

metabolites_up = df_diff_met_UC %>%
  mutate(logFC = Group1 - Group2) %>%
  filter(logFC > 1 & pvalue < 0.05) %>%
  pull(id)

metabolites_down = df_diff_met_UC %>%
  mutate(logFC = Group2 - Group1) %>%
  filter(logFC > 0) %>%
  pull(id)

tmp = cbind(
  df_met_UC[c(metabolites_up, metabolites_down), ],
  df_met_HC[c(metabolites_up, metabolites_down), ]
)
tmp = t(scale(t(tmp)))

figmat = tmp %>%
  as.data.frame() %>%
  rownames_to_column("ID") %>%
  mutate(Group=ifelse(ID %in% metabolites_up, "High in UC", "Low in UC")) %>%
  gather(key="Sample", value="Value", -Group, -ID) %>%
  mutate(SampleGroup=ifelse(Sample %in% colnames(df_met_HC), "HC", "UC"),
         `Scaled value` = ifelse(Value > 2, 2, ifelse(Value < -2, -2, Value))) %>%
  mutate(SampleGroup = factor(SampleGroup, levels=c("UC", "HC")),
         Group = factor(Group, levels=c("High in UC", "Low in UC")))

figD = ggplot(figmat) +
  geom_tile(aes(x=Sample, y=ID, fill=`Scaled value`)) +
  facet_grid(Group~SampleGroup, scales="free", switch="both", space="free") + 
  scale_fill_gradient2(high="#ED7D31", low="#3284BF", mid="white", limits=c(-2,2)) +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        legend.title = element_text(angle=-90, hjust=0.5)) + 
  xlab("Sample") + ylab("Metabolites") +
  guides(fill=guide_colorbar(barwidth = 0.7, title.position="right"))

figD

gs_met = graph_based_enrichment(
  metabolites_up, 
  entity_class = "Metabolite", 
  path = "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite" , 
  background = metabolites_down,
  con = con, 
  config = config, 
  threshold = threshold
)
## Warning in if (is.na(background)) {: the condition has length > 1 and only the
## first element will be used
gs_met %>% filter(pvalue < 0.1) %>% arrange(desc(OR))
##                    pair
## 1     LYVE1+ macrophage
## 2 Inflammatory monocyte
## 3        Cycling plasma
## 4             CD8+ Tc17
## 5             CD8+ MAIT
## 6                   pDC
## 7       Enteroendocrine
## 8            Cycling TA
##                                                         path target_TRUE
## 1 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           9
## 2 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           7
## 3 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           7
## 4 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           7
## 5 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           7
## 6 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           7
## 7 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           9
## 8 Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite           7
##   target_FALSE background_TRUE background_FALSE        OR     pvalue       fdr
## 1            9               7               28 4.0000000 0.03178645 0.3794572
## 2           11               5               30 3.8181818 0.07988572 0.3794572
## 3           11               5               30 3.8181818 0.07988572 0.3794572
## 4           11               5               30 3.8181818 0.07988572 0.3794572
## 5           11               5               30 3.8181818 0.07988572 0.3794572
## 6           11               5               30 3.8181818 0.07988572 0.3794572
## 7            9               8               27 3.3750000 0.06410844 0.3794572
## 8           11              24               11 0.2916667 0.04588191 0.3794572
tmp = get_entities(con, "Cell", return_class=T)
tmp = tibble(CellGroup=names(tmp), Cell=tmp) %>% unnest(Cell)
cell2group = tmp$CellGroup; names(cell2group) = tmp$Cell

cell2color = RColorBrewer::brewer.pal(9, "Set3")
names(cell2color) = tmp$CellGroup %>% unique()

p = 0.2
cell_orders = gs_met %>% filter(!is.null(OR) & pvalue < p) %>% arrange(OR) %>% pull(pair)

figmat = gs_met %>% 
  filter(!is.null(OR) & pvalue < p) %>%
  dplyr::rename(Cell = pair) %>%
  mutate(Cell = factor(Cell, levels=Cell)) %>%
  mutate(target_TRUE_2 = target_TRUE / (target_TRUE + target_FALSE),
         target_FALSE_2 = target_FALSE / (target_TRUE + target_FALSE),
         background_TRUE_2 = background_TRUE / (background_TRUE + background_FALSE),
         background_FALSE_2 = background_FALSE / (background_TRUE + background_FALSE)) %>%
  dplyr::select(Cell, target_TRUE_2, target_FALSE_2, background_TRUE_2, background_FALSE_2) %>%
  gather(key="Group", value="Count", -Cell) %>%
  separate(Group, sep="_", into=c("Group", "Has_relation", "tmp")) %>%
  mutate(Group = ifelse(Group == "target", "Up", "Down"),
         CellGroup = cell2group[as.character(Cell)]) %>%
  mutate(Cell = factor(Cell, levels=cell_orders))

fig1 = ggplot(figmat %>% dplyr::select(Cell, CellGroup) %>% unique()) +
  geom_tile(aes(x=1, y=Cell, fill=CellGroup)) + 
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        legend.title = element_text(angle=-90, hjust=0.5)) +
  #ggsci::scale_fill_npg() +
  scale_fill_manual(values=cell2color) +
  xlab(NULL) + ylab(NULL) +
  guides(fill=guide_legend(title="", keywidth = 0.8, title.position="right"))

fig2 = ggplot(figmat %>% filter((Group == "Up") & (Has_relation == "TRUE"))) +
  geom_bar(aes(x=Count, y=Cell), stat="identity", fill="grey60") +
  theme_minimal() +
  theme(axis.text.y = element_blank(), 
        plot.title=element_text(size=10, hjust=0.5)) +
  scale_x_reverse(limits=c(.8, 0)) +
  ggtitle("Metabolites high in UC") +
  xlab(NULL) + ylab(NULL)

fig3 = ggplot(figmat %>% filter((Group == "Down") & (Has_relation == "TRUE"))) +
  geom_bar(aes(x=Count, y=Cell), stat="identity", fill="grey60") +
  theme_minimal() +
  theme(axis.text.y = element_blank(), 
        plot.title=element_text(size=10, hjust=0.5)) +
  scale_x_continuous(limits=c(0, .9), breaks=seq(0, 0.8, by=0.2)) +
  ggtitle("Metabolites low in UC") +
  xlab(NULL) + ylab(NULL)

fig_legend = cowplot::get_legend(fig1)
fig1 = fig1 + theme(legend.position = "none")

figE = cowplot::plot_grid(fig1, fig2, fig3, fig_legend, nrow=1, align="h", axis="tb", rel_widths=c(1,1,1,0.7))
figE

path = "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite"
g = network_multi_entity(
  paths = path, 
  target_entities = list(Metabolite = c(metabolites_down, metabolites_up), Cell = names(cell2group)),
  con = con,
  config = config,
  states = NULL,
  threshold = threshold,
  output = "igraph",
  return_path = T
)

tmp = data.frame(entity=V(g)$name, type=V(g)$type, stringsAsFactors=F) %>% 
  mutate(cell_group = cell2group[entity]) %>%
  mutate(new_name = ifelse(type == "Cell", paste0("Cell: ", cell_group),
                           ifelse(entity %in% metabolites_up, "Metabolite high in UC", "Metabolite low in UC"))) %>%
  mutate(color=ifelse(type == "Cell", cell2color[cell_group], 
                      ifelse((entity %in% metabolites_up), "black", "grey60"))) %>%
  mutate(shape=ifelse(type == "Cell", 16, 18))

V(g)$new_name = tmp$new_name
tmp = tmp %>% select(new_name, color, shape) %>% unique()
name2color = tmp$color; names(name2color) = tmp$new_name
name2shape = tmp$shape; names(name2shape) = tmp$new_name

figF = ggraph(g, layout="igraph", algorithm="kk") + 
  geom_edge_link(color="grey80", alpha=0.3) +
  geom_node_point(aes(color=new_name, shape=new_name), size=4) +
  scale_color_manual(values = name2color) +
  scale_shape_manual(values = name2shape) +
  theme_void() + 
  theme(legend.title = element_blank(),
        plot.title = element_text(hjust=0.5, size=8)) +
  ggtitle(paste0("Associations between Cell & Metabolites: \n", path))
figF