Data extraction from Neo4j database

Toru Maruyama

2023-12-05

library(CellInteractomeR)
library(neo2R)
library(kableExtra)
## ℹ Loading CellInteractomeR

Connect database

  • url: http://localhost:7474 is the default URL set in Neo4j
  • please use your username and password instead
con = connect_database(
  url = "http://localhost:7474",
  username = "neo4j",
  password = "yourpassword"
)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## ℹ The deprecated feature was likely used in the CellInteractomeR package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Get basic information of the graph

  • get_model(): Graph model
  • get_entity_types(): Types of entities (nodes) in the gr
  • get_relation_types(): Types of relationships (edges) in the graph
  • get_entities(): List of entities
  • get_entity_property_names(): List of property (attribute) types for the entities
  • get_relation_property_names(): List of property (attribute) types for the relationships
  • get_entity_properties(): Properties (attributes) for the entities
  • get_relation_properties(): Properties (attributes) for the relationships
  • get_states(): List of states defined in the graph
  • get_relation_paths(): Paths of relationships among entities

get_model(): Graph model

model = get_model(con)
model %>%
  kable() %>%
  kable_classic_2()
from to relation_type directed
Metabolite Microbe UPTAKE TRUE
DiffTest Gene TESTED FALSE
Cell Gene SPECIFICALLY_EXPRESS FALSE
Metabolite Gene RECEPTOR TRUE
Microbe Metabolite PRODUCE TRUE
Microbe Gene MOLECULAR_MIMICRY TRUE
Cell Cell LIGAND_RECEPTOR_COUNT TRUE
Gene Gene LIGAND_RECEPTOR TRUE
Gene Metabolite ENZYME TRUE
Cell DiffTest DIFFERENTIAL_EXPRESSION FALSE
DiffTest Microbe DIFFERENTIAL_ABUNDANCE FALSE
Cell DiffTest DIFFERENTIAL_ABUNDANCE FALSE
DiffTest Metabolite DIFFERENTIAL_ABUNDANCE FALSE
Cell Metabolite CORRELATE_WITH FALSE
Cell Cell CORRELATE_WITH FALSE
Metabolite Microbe CORRELATE_WITH FALSE
Microbe Microbe CORRELATE_WITH FALSE
Cell Microbe CORRELATE_WITH FALSE

get_entity_types()/get_relation_types(): Entity/Relationship types

get_entity_types(con)
## [1] "Cell"       "DiffTest"   "Gene"       "Metabolite" "Microbe"   
## [6] "State"
get_relation_types(con)
##  [1] "CORRELATE_WITH"          "TESTED"                 
##  [3] "DIFFERENTIAL_ABUNDANCE"  "SPECIFICALLY_EXPRESS"   
##  [5] "LIGAND_RECEPTOR_COUNT"   "DIFFERENTIAL_EXPRESSION"
##  [7] "PRODUCE"                 "UPTAKE"                 
##  [9] "ENZYME"                  "RECEPTOR"               
## [11] "MOLECULAR_MIMICRY"       "LIGAND_RECEPTOR"

get_entity_property_names()/get_relation_property_names(): List of properties for the entities/relationships

get_entity_property_names(con, entity_type="Cell")
## [1] "CellType"      "CellTypeGroup" "id"            "name"
get_relation_property_names(con, relation_type="DIFFERENTIAL_ABUNDANCE")
## [1] "directed"  "Disease"   "fdr"       "logfc"     "method"    "pvalue"   
## [7] "reference" "study"

get_entity_properties()/get_relation_properties(): List of properties for the entities/relationships

get_entity_properties(con, "Cell", "CellType")
##  [1] NA                         "Naive B"                 
##  [3] "Stromal 2"                "LYVE1+ macrophage"       
##  [5] "APOE+ macrophage"         "Memory B"                
##  [7] "Inflammatory fibroblast"  "IgA plasma"              
##  [9] "Non-classical monocyte"   "CD4+ tissue-Tcm"         
## [11] "Cycling EC"               "BEST4+ epithelial"       
## [13] "CD4+ Trm"                 "Adult glia"              
## [15] "Stromal 1"                "IgA+IgG+ plasma"         
## [17] "GC B"                     "Cycling TA"              
## [19] "CD4+ blood-Tcm"           "pDC"                     
## [21] "Adult venous EC (C7+)"    "CD4+ Treg"               
## [23] "Reticular fibroblast"     "CD8+ Tn"                 
## [25] "Mast"                     "Cycling GC B"            
## [27] "NCR+ ILC3"                "Myofibroblast 2"         
## [29] "DUOX2+ epithelial"        "Adult arterial capillary"
## [31] "IgA-IgG- plasma"          "Immature pericyte"       
## [33] "CD8+ activated T"         "Cycling macrophage"      
## [35] "CD4+ Tfh"                 "Adult arterial EC"       
## [37] "Cycling plasma"           "Paneth"                  
## [39] "CD4+ activated T"         "CD16+ NK"                
## [41] "Adult colonocyte"         "CD56+ SELL_low NK"       
## [43] "Enteroendocrine"          "Goblet"                  
## [45] "cDC2"                     "LEC"                     
## [47] "Pediatric colonocyte"     "Enterocyte"              
## [49] "CD8+ IEL"                 "Adult venous EC (SELE+)" 
## [51] "CD56+ SELL_high NK"       "SMC 1"                   
## [53] "Fetal venous capillary"   "CD4+ Th17"               
## [55] "Mature pericyte"          "cDC1"                    
## [57] "CD8+ Trm"                 "CD4+ Tn"                 
## [59] "CD8+ Teff"                "IgG plasma"              
## [61] "CD8+ Tc17"                "CD8+ Tem"                
## [63] "Transitional stromal"     "Classical monocyte"      
## [65] "CD8+ Tcm"                 "CD4+ Temra"              
## [67] "Tuft"                     "CD8+ MAIT"               
## [69] "AREG+ macrophage"         "LAMP3+ DC"               
## [71] "Inflammatory monocyte"    "TA"
get_relation_properties(con, "DIFFERENTIAL_ABUNDANCE", "Disease")
## [1] "CD" "UC"

get_states(): List of states in the graph

get_states(con) %>%
  kable() %>%
  kable_classic_2()
Disease
CD
UC
HC

get_relation_paths(): Paths of relationships among entities

paths = get_relation_paths(con)
names(paths)
##  [1] "Metabolite - UPTAKE - Microbe"                                                        
##  [2] "Cell - SPECIFICALLY_EXPRESS - Gene"                                                   
##  [3] "Metabolite - RECEPTOR - Gene"                                                         
##  [4] "Microbe - PRODUCE - Metabolite"                                                       
##  [5] "Microbe - MOLECULAR_MIMICRY - Gene"                                                   
##  [6] "Cell - LIGAND_RECEPTOR_COUNT - Cell"                                                  
##  [7] "Gene - LIGAND_RECEPTOR - Gene"                                                        
##  [8] "Gene - ENZYME - Metabolite"                                                           
##  [9] "Cell - CORRELATE_WITH - Metabolite"                                                   
## [10] "Cell - CORRELATE_WITH - Cell"                                                         
## [11] "Metabolite - CORRELATE_WITH - Microbe"                                                
## [12] "Microbe - CORRELATE_WITH - Microbe"                                                   
## [13] "Cell - CORRELATE_WITH - Microbe"                                                      
## [14] "Cell - CORRELATE_WITH - Metabolite - RECEPTOR - Gene"                                 
## [15] "Cell - CORRELATE_WITH - Metabolite - ENZYME - Gene"                                   
## [16] "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite"                           
## [17] "Cell - SPECIFICALLY_EXPRESS - Gene - ENZYME - Metabolite"                             
## [18] "Cell - CORRELATE_WITH - Metabolite - UPTAKE - Microbe"                                
## [19] "Cell - CORRELATE_WITH - Metabolite - PRODUCE - Microbe"                               
## [20] "Cell - CORRELATE_WITH - Metabolite - CORRELATE_WITH - Microbe"                        
## [21] "Cell - CORRELATE_WITH - Metabolite - RECEPTOR - Gene - MOLECULAR_MIMICRY - Microbe"   
## [22] "Cell - CORRELATE_WITH - Metabolite - ENZYME - Gene - MOLECULAR_MIMICRY - Microbe"     
## [23] "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite - UPTAKE - Microbe"        
## [24] "Cell - SPECIFICALLY_EXPRESS - Gene - ENZYME - Metabolite - UPTAKE - Microbe"          
## [25] "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite - PRODUCE - Microbe"       
## [26] "Cell - SPECIFICALLY_EXPRESS - Gene - ENZYME - Metabolite - PRODUCE - Microbe"         
## [27] "Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite - CORRELATE_WITH - Microbe"
## [28] "Cell - SPECIFICALLY_EXPRESS - Gene - ENZYME - Metabolite - CORRELATE_WITH - Microbe"  
## [29] "Cell - SPECIFICALLY_EXPRESS - Gene - MOLECULAR_MIMICRY - Microbe"                     
## [30] "Gene - RECEPTOR - Metabolite - UPTAKE - Microbe"                                      
## [31] "Gene - ENZYME - Metabolite - UPTAKE - Microbe"                                        
## [32] "Gene - RECEPTOR - Metabolite - PRODUCE - Microbe"                                     
## [33] "Gene - ENZYME - Metabolite - PRODUCE - Microbe"                                       
## [34] "Gene - RECEPTOR - Metabolite - CORRELATE_WITH - Microbe"                              
## [35] "Gene - ENZYME - Metabolite - CORRELATE_WITH - Microbe"                                
## [36] "Metabolite - RECEPTOR - Gene - MOLECULAR_MIMICRY - Microbe"                           
## [37] "Metabolite - ENZYME - Gene - MOLECULAR_MIMICRY - Microbe"
paths$`Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite` %>%
  kable() %>%
  kable_classic_2()
from to relation_type directed
Cell Gene SPECIFICALLY_EXPRESS FALSE
Gene Metabolite RECEPTOR TRUE

Data retrieval

Search with Cypher query

The object con from the function connect_database can be used for querying with neo2R (https://github.com/patzaw/neo2R) library. You can retrieve data from the Neo4j database with Cypher queries as follows.

query = "
  MATCH (c: Cell)-[s: SPECIFICALLY_EXPRESS]-(g: Gene)-[r: RECEPTOR]-(m: Metabolite)
  WHERE c.id='BEST4+ epithelial'
  AND s.Disease='UC'
  RETURN c.id as Cell, g.id as Receptor_gene, m.id as Metabolite
  LIMIT 30
"
result = neo2R::cypher(con, query)
result %>%
  kable() %>%
  kable_classic_2()
Cell Receptor_gene Metabolite
BEST4+ epithelial SLC27A3 adrenate
BEST4+ epithelial SLC27A3 eicosatrienoate
BEST4+ epithelial SLC27A3 linoleate
BEST4+ epithelial SLC27A3 docosahexaenoate
BEST4+ epithelial SLC27A3 eicosapentaenoate
BEST4+ epithelial SLC27A3 alpha-linolenate
BEST4+ epithelial SLC27A3 nervonic acid
BEST4+ epithelial SLC27A3 13-docosenoate
BEST4+ epithelial SLC27A3 eicosenoate
BEST4+ epithelial SLC27A3 arachidate
BEST4+ epithelial SLC27A3 nonadecanoate
BEST4+ epithelial SLC27A3 oleate
BEST4+ epithelial SLC27A3 stearate
BEST4+ epithelial SLC27A3 heptadecanoate
BEST4+ epithelial SLC27A3 palmitoleate
BEST4+ epithelial SLC27A3 palmitate
BEST4+ epithelial SLC27A3 pentadecanoate
BEST4+ epithelial SLC27A3 myristate
BEST4+ epithelial SLC27A3 dodecanoate
BEST4+ epithelial SLC27A3 caprate
BEST4+ epithelial SLC27A3 caprylate
BEST4+ epithelial SLC27A3 heptanoate
BEST4+ epithelial SLC27A3 caproate
BEST4+ epithelial SLC27A3 butyrate
BEST4+ epithelial FABP1 adrenate
BEST4+ epithelial FABP1 eicosatrienoate
BEST4+ epithelial FABP1 linoleate
BEST4+ epithelial FABP1 docosahexaenoate
BEST4+ epithelial FABP1 eicosapentaenoate
BEST4+ epithelial FABP1 alpha-linolenate

Automatically generate Cypher Query with functions

CellInteractomeR has functions to generate Cypher query language without its deep knowledge. The following functions are currently prepared.

  • query_single_entity(): Create query to get network from an entity of interest
  • query_multi_entity(): Create query to get network among entities of interest
  • query_search_by_path(): Create query to search cascades

query_single_entity()

query = query_single_entity(
  paths = c("Cell - SPECIFICALLY_EXPRESS - Gene"), # select from output of `get_relation_paths()`
  target_entity_class = "Cell",
  target_entity_name = "BEST4+ epithelial",
  states = list(Disease = c("UC")),
  config,
  threshold
)
cat(query)
## MATCH (e1: Cell)-[r1: SPECIFICALLY_EXPRESS]-(e2: Gene)
## WHERE e1.id='BEST4+ epithelial'
## AND ((r1.expression >= 0.000000) OR (r1.expression is NULL))
## AND ((r1.FDR <= 0.200000) OR (r1.FDR is NULL))
## AND ((r1.logFC >= 1.000000) OR (r1.logFC is NULL))
## AND ((r1.method IN ['wilcoxon']) OR (r1.method is NULL))
## AND ((r1.rank <= 3.000000) OR (r1.rank is NULL))
## AND ((r1.study IN ['IBDMDB']) OR (r1.study is NULL))
## AND ((r1.Disease IN ['UC']) OR (r1.Disease is NULL))
## RETURN e1, e2, r1

An argument diff enables to filter entities differentially abundant in specific conditions

# this option selects entities which is more abundant in UC than control by logFC > 0
diff = list(type="above", logFC=0, Disease="UC")

query = query_single_entity(
  paths = c("Cell - CORRELATE_WITH - Cell"), # select from output of `get_relation_paths()`
  target_entity_class = "Cell",
  target_entity_name = "BEST4+ epithelial",
  states = list(Disease = c("UC")),
  config,
  threshold,
  diff = diff
)
cat(query)
## MATCH (e1: Cell)-[r1: CORRELATE_WITH]-(e2: Cell)
## MATCH (e2)-[diff2:DIFFERENTIAL_ABUNDANCE]-()
## WHERE e1.id='BEST4+ epithelial'
## AND ((r1.method IN ['Pearson', 'Spearman']) OR (r1.method is NULL))
## AND ((r1.study IN ['IBDMDB']) OR (r1.study is NULL))
## AND ((r1.value >= 0.250000) OR (r1.value is NULL))
## AND ((r1.Disease IN ['UC']) OR (r1.Disease is NULL))
## AND diff2.logfc >= 0
## AND diff2.Disease='UC' 
## RETURN e1, e2, r1
result = neo2R::cypher(con, query, result="graph")
result$nodes[[1]]
## $id
## [1] "93"
## 
## $elementId
## [1] "4:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:93"
## 
## $labels
## $labels[[1]]
## [1] "Cell"
## 
## 
## $properties
## $properties$CellType
## [1] "BEST4+ epithelial"
## 
## $properties$id
## [1] "BEST4+ epithelial"
## 
## $properties$CellTypeGroup
## [1] "Epithelial"
result$relationships[[1]]
## $id
## [1] "63131"
## 
## $elementId
## [1] "5:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:63131"
## 
## $type
## [1] "CORRELATE_WITH"
## 
## $startNode
## [1] "93"
## 
## $startNodeElementId
## [1] "4:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:93"
## 
## $endNode
## [1] "110"
## 
## $endNodeElementId
## [1] "4:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:110"
## 
## $properties
## $properties$directed
## [1] "False"
## 
## $properties$study
## [1] "IBDMDB"
## 
## $properties$method
## [1] "Pearson"
## 
## $properties$Disease
## [1] "UC"
## 
## $properties$value
## [1] 0.598486

query_multi_entity()

cells = get_entity_properties(con, "Cell", "id") %>% na.omit()
cells = cells[1:10]

query = query_multi_entity(
  paths = c("Cell - CORRELATE_WITH - Cell"), # select from output of `get_relation_paths()`
  target_entities = list(Cell = cells),
  states = list(Disease = c("UC")),
  config,
  threshold
)
cat(query)
## MATCH (e1: Cell)-[r1: CORRELATE_WITH]-(e2: Cell)
## WHERE e1.id IN ['Naive B', 'Stromal 2', 'LYVE1+ macrophage', 'APOE+ macrophage', 'Memory B', 'Inflammatory fibroblast', 'IgA plasma', 'Non-classical monocyte', 'CD4+ tissue-Tcm', 'Cycling EC']
## AND e2.id IN ['Naive B', 'Stromal 2', 'LYVE1+ macrophage', 'APOE+ macrophage', 'Memory B', 'Inflammatory fibroblast', 'IgA plasma', 'Non-classical monocyte', 'CD4+ tissue-Tcm', 'Cycling EC']
## AND ((r1.method IN ['Pearson', 'Spearman']) OR (r1.method is NULL))
## AND ((r1.study IN ['IBDMDB']) OR (r1.study is NULL))
## AND ((r1.value >= 0.250000) OR (r1.value is NULL))
## AND ((r1.Disease IN ['UC']) OR (r1.Disease is NULL))
## RETURN e1, e2, r1

An argument diff enables to filter entities differentially abundant in specific conditions

# this option selects entities which is more abundant in UC than control by logFC > 0
diff = list(type="above", logFC=0, Disease="UC")

# 
cells = get_entity_properties(con, "Cell", "id") %>% na.omit()

query = query_multi_entity(
  paths = c("Cell - CORRELATE_WITH - Cell"), # select from output of `get_relation_paths()`
  target_entities = list(Cell = cells),
  states = list(Disease = c("UC")),
  config,
  threshold,
  diff = diff
)
cat(query)
## MATCH (e1: Cell)-[r1: CORRELATE_WITH]-(e2: Cell)
## MATCH (e1)-[diff1:DIFFERENTIAL_ABUNDANCE]-()
## MATCH (e2)-[diff2:DIFFERENTIAL_ABUNDANCE]-()
## WHERE e1.id IN ['Naive B', 'Stromal 2', 'LYVE1+ macrophage', 'APOE+ macrophage', 'Memory B', 'Inflammatory fibroblast', 'IgA plasma', 'Non-classical monocyte', 'CD4+ tissue-Tcm', 'Cycling EC', 'BEST4+ epithelial', 'CD4+ Trm', 'Adult glia', 'Stromal 1', 'IgA+IgG+ plasma', 'GC B', 'Cycling TA', 'CD4+ blood-Tcm', 'pDC', 'Adult venous EC (C7+)', 'CD4+ Treg', 'Reticular fibroblast', 'CD8+ Tn', 'Mast', 'Cycling GC B', 'NCR+ ILC3', 'Myofibroblast 2', 'DUOX2+ epithelial', 'Adult arterial capillary', 'IgA-IgG- plasma', 'Immature pericyte', 'CD8+ activated T', 'Cycling macrophage', 'CD4+ Tfh', 'Adult arterial EC', 'Cycling plasma', 'Paneth', 'CD4+ activated T', 'CD16+ NK', 'Adult colonocyte', 'CD56+ SELL_low NK', 'Enteroendocrine', 'Goblet', 'cDC2', 'LEC', 'Pediatric colonocyte', 'Enterocyte', 'CD8+ IEL', 'Adult venous EC (SELE+)', 'CD56+ SELL_high NK', 'SMC 1', 'Fetal venous capillary', 'CD4+ Th17', 'Mature pericyte', 'cDC1', 'CD8+ Trm', 'CD4+ Tn', 'CD8+ Teff', 'IgG plasma', 'CD8+ Tc17', 'CD8+ Tem', 'Transitional stromal', 'Classical monocyte', 'CD8+ Tcm', 'CD4+ Temra', 'Tuft', 'CD8+ MAIT', 'AREG+ macrophage', 'LAMP3+ DC', 'Inflammatory monocyte', 'TA']
## AND e2.id IN ['Naive B', 'Stromal 2', 'LYVE1+ macrophage', 'APOE+ macrophage', 'Memory B', 'Inflammatory fibroblast', 'IgA plasma', 'Non-classical monocyte', 'CD4+ tissue-Tcm', 'Cycling EC', 'BEST4+ epithelial', 'CD4+ Trm', 'Adult glia', 'Stromal 1', 'IgA+IgG+ plasma', 'GC B', 'Cycling TA', 'CD4+ blood-Tcm', 'pDC', 'Adult venous EC (C7+)', 'CD4+ Treg', 'Reticular fibroblast', 'CD8+ Tn', 'Mast', 'Cycling GC B', 'NCR+ ILC3', 'Myofibroblast 2', 'DUOX2+ epithelial', 'Adult arterial capillary', 'IgA-IgG- plasma', 'Immature pericyte', 'CD8+ activated T', 'Cycling macrophage', 'CD4+ Tfh', 'Adult arterial EC', 'Cycling plasma', 'Paneth', 'CD4+ activated T', 'CD16+ NK', 'Adult colonocyte', 'CD56+ SELL_low NK', 'Enteroendocrine', 'Goblet', 'cDC2', 'LEC', 'Pediatric colonocyte', 'Enterocyte', 'CD8+ IEL', 'Adult venous EC (SELE+)', 'CD56+ SELL_high NK', 'SMC 1', 'Fetal venous capillary', 'CD4+ Th17', 'Mature pericyte', 'cDC1', 'CD8+ Trm', 'CD4+ Tn', 'CD8+ Teff', 'IgG plasma', 'CD8+ Tc17', 'CD8+ Tem', 'Transitional stromal', 'Classical monocyte', 'CD8+ Tcm', 'CD4+ Temra', 'Tuft', 'CD8+ MAIT', 'AREG+ macrophage', 'LAMP3+ DC', 'Inflammatory monocyte', 'TA']
## AND ((r1.method IN ['Pearson', 'Spearman']) OR (r1.method is NULL))
## AND ((r1.study IN ['IBDMDB']) OR (r1.study is NULL))
## AND ((r1.value >= 0.250000) OR (r1.value is NULL))
## AND ((r1.Disease IN ['UC']) OR (r1.Disease is NULL))
## AND diff1.logfc >= 0
## AND diff1.Disease='UC' 
## AND diff2.logfc >= 0
## AND diff2.Disease='UC' 
## RETURN e1, e2, r1
result = neo2R::cypher(con, query, result="graph")
result$nodes[[1]]
## $id
## [1] "115"
## 
## $elementId
## [1] "4:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:115"
## 
## $labels
## $labels[[1]]
## [1] "Cell"
## 
## 
## $properties
## $properties$CellType
## [1] "Cycling macrophage"
## 
## $properties$id
## [1] "Cycling macrophage"
## 
## $properties$CellTypeGroup
## [1] "Myeloid"

query_search_by_path()

relation_path_df = config$relation_path_dfs$`Cell - SPECIFICALLY_EXPRESS - Gene - RECEPTOR - Metabolite - PRODUCE - Microbe`
relation_path_df %>% kbl(row.names=F) %>% kable_classic_2()
from to relation_type directed
Cell Gene SPECIFICALLY_EXPRESS FALSE
Gene Metabolite RECEPTOR TRUE
Metabolite Microbe PRODUCE TRUE
query = query_search_by_path(
  relation_path_df,
  target_entity = "BEST4+ epithelial",
  states = list(Disease = c("UC")),
  config,
  threshold
)
cat(query)
## MATCH (e1: Cell)-[r1: SPECIFICALLY_EXPRESS]-(e2: Gene)
## MATCH (e2: Gene)-[r2: RECEPTOR]-(e3: Metabolite)
## MATCH (e3: Metabolite)-[r3: PRODUCE]-(e4: Microbe)
## WHERE e1.id='BEST4+ epithelial'
## AND ((r1.expression >= 0.000000) OR (r1.expression is NULL))
## AND ((r1.FDR <= 0.200000) OR (r1.FDR is NULL))
## AND ((r1.logFC >= 1.000000) OR (r1.logFC is NULL))
## AND ((r1.method IN ['wilcoxon']) OR (r1.method is NULL))
## AND ((r1.rank <= 3.000000) OR (r1.rank is NULL))
## AND ((r1.study IN ['IBDMDB']) OR (r1.study is NULL))
## AND ((r3.n >= 1.000000) OR (r3.n is NULL))
## AND ((r3.source IN ['gutMGene', 'NJC19', 'AGORA2']) OR (r3.source is NULL))
## AND ((r3.total >= 1.000000) OR (r3.total is NULL))
## AND ((r3.value >= 0.250000) OR (r3.value is NULL))
## AND ((r2.source IN ['HMDB', 'GPCRdb', 'Human1']) OR (r2.source is NULL))
## AND ((r1.Disease IN ['UC']) OR (r1.Disease is NULL))
## AND ((r2.Disease IN ['UC']) OR (r2.Disease is NULL))
## AND ((r3.Disease IN ['UC']) OR (r3.Disease is NULL))
## RETURN e1, e2, e3, e4, r1, r2, r3
# 
query = query_search_by_path(
  relation_path_df,
  target_entity = "BEST4+ epithelial",
  states = list(Disease = c("UC")),
  config,
  threshold,
  diff=list(logFC=0, type="below", Disease="UC")
)
cat(query)
## MATCH (e1: Cell)-[r1: SPECIFICALLY_EXPRESS]-(e2: Gene)
## MATCH (e2: Gene)-[r2: RECEPTOR]-(e3: Metabolite)
## MATCH (e3: Metabolite)-[r3: PRODUCE]-(e4: Microbe)
## MATCH (e3)-[diff3:DIFFERENTIAL_ABUNDANCE]-()
## MATCH (e4)-[diff4:DIFFERENTIAL_ABUNDANCE]-()
## WHERE e1.id='BEST4+ epithelial'
## AND ((r1.expression >= 0.000000) OR (r1.expression is NULL))
## AND ((r1.FDR <= 0.200000) OR (r1.FDR is NULL))
## AND ((r1.logFC >= 1.000000) OR (r1.logFC is NULL))
## AND ((r1.method IN ['wilcoxon']) OR (r1.method is NULL))
## AND ((r1.rank <= 3.000000) OR (r1.rank is NULL))
## AND ((r1.study IN ['IBDMDB']) OR (r1.study is NULL))
## AND ((r3.n >= 1.000000) OR (r3.n is NULL))
## AND ((r3.source IN ['gutMGene', 'NJC19', 'AGORA2']) OR (r3.source is NULL))
## AND ((r3.total >= 1.000000) OR (r3.total is NULL))
## AND ((r3.value >= 0.250000) OR (r3.value is NULL))
## AND ((r2.source IN ['HMDB', 'GPCRdb', 'Human1']) OR (r2.source is NULL))
## AND ((r1.Disease IN ['UC']) OR (r1.Disease is NULL))
## AND ((r2.Disease IN ['UC']) OR (r2.Disease is NULL))
## AND ((r3.Disease IN ['UC']) OR (r3.Disease is NULL))
## AND diff3.logfc <= 0
## AND diff3.Disease='UC' 
## AND diff4.logfc <= 0
## AND diff4.Disease='UC' 
## RETURN e1, e2, e3, e4, r1, r2, r3
result = neo2R::cypher(con, query, result="graph")
result$nodes[[1]]
## $id
## [1] "950"
## 
## $elementId
## [1] "4:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:950"
## 
## $labels
## $labels[[1]]
## [1] "Microbe"
## 
## 
## $properties
## $properties$phylum_name
## [1] "Bacillota"
## 
## $properties$class_taxid
## [1] "186801"
## 
## $properties$species_name
## [1] "Christensenella minuta"
## 
## $properties$genus_name
## [1] "Christensenella"
## 
## $properties$family_taxid
## [1] "186802"
## 
## $properties$order_name
## [1] "Christensenellaceae"
## 
## $properties$species_taxid
## [1] "626937"
## 
## $properties$order_taxid
## [1] "990719"
## 
## $properties$taxid
## [1] "626937"
## 
## $properties$rank
## [1] "species"
## 
## $properties$phylum_taxid
## [1] "1239"
## 
## $properties$id
## [1] "Christensenella minuta"
## 
## $properties$family_name
## [1] "Eubacteriales"
## 
## $properties$genus_taxid
## [1] "990721"
## 
## $properties$class_name
## [1] "Clostridia"
result$relationships[[1]]
## $id
## [1] "79276"
## 
## $elementId
## [1] "5:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:79276"
## 
## $type
## [1] "SPECIFICALLY_EXPRESS"
## 
## $startNode
## [1] "93"
## 
## $startNodeElementId
## [1] "4:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:93"
## 
## $endNode
## [1] "3328"
## 
## $endNodeElementId
## [1] "4:e7c7ced8-5961-47bd-a9b2-cb6254fbc4c0:3328"
## 
## $properties
## $properties$directed
## [1] "False"
## 
## $properties$study
## [1] "IBDMDB"
## 
## $properties$expression
## [1] 3.05688
## 
## $properties$logFC
## [1] 6.028984
## 
## $properties$method
## [1] "wilcoxon"
## 
## $properties$Disease
## [1] "UC"
## 
## $properties$FDR
## [1] 4.510815e-44
## 
## $properties$rank
## [1] 3