Skip to content

Commit c189cf2

Browse files
Neo4j Extension and Tutorial (#447)
# Motivation Codegen <> Neo4j # Content Adds an extension that allows a codebase graph constructed with codgen to be exported to neo4j. # Testing Tested by running locally. # Please check the following before marking your PR as ready for review - [x] I have updated the documentation or added new documentation as needed
1 parent af1d08c commit c189cf2

File tree

13 files changed

+407
-7
lines changed

13 files changed

+407
-7
lines changed

docs/images/neo4j-call-graph.png

242 KB
Loading

docs/images/neo4j-class-hierarchy.png

169 KB
Loading

docs/images/neo4j-class-methods.png

160 KB
Loading

docs/images/neo4j-function-calls.png

115 KB
Loading

docs/mint.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@
101101
"tutorials/fixing-import-loops-in-pytorch",
102102
"tutorials/python2-to-python3",
103103
"tutorials/flask-to-fastapi",
104-
"tutorials/build-mcp"
104+
"tutorials/build-mcp",
105+
"tutorials/neo4j-graph"
105106
]
106107
},
107108
{

docs/tutorials/neo4j-graph.mdx

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
---
2+
title: "Neo4j Graph"
3+
sidebarTitle: "Neo4j Graph"
4+
icon: "database"
5+
iconType: "solid"
6+
---
7+
8+
<Frame caption="Function call graph for a codebase">
9+
<img src="/images/neo4j-call-graph.png" />
10+
</Frame>
11+
12+
# Neo4j Graph
13+
14+
Codegen can export codebase graphs to Neo4j for visualization and analysis.
15+
16+
## Installation
17+
In order to use Neo4j you will need to install it and run it locally using Docker.
18+
19+
### Neo4j
20+
First, install Neo4j using the official [installation guide](https://neo4j.com/docs/desktop-manual/current/installation/download-installation/).
21+
22+
### Docker
23+
To run Neo4j locally using Docker, follow the instructions [here](https://neo4j.com/docs/apoc/current/installation/#docker).
24+
25+
## Launch Neo4j Locally
26+
27+
```bash
28+
docker run \
29+
-p 7474:7474 -p 7687:7687 \
30+
-v $PWD/data:/data -v $PWD/plugins:/plugins \
31+
--name neo4j-apoc \
32+
-e NEO4J_apoc_export_file_enabled=true \
33+
-e NEO4J_apoc_import_file_enabled=true \
34+
-e NEO4J_apoc_import_file_use__neo4j__config=true \
35+
-e NEO4J_PLUGINS=\[\"apoc\"\] \
36+
neo4j:latest
37+
```
38+
## Usage
39+
40+
```python
41+
from codegen import Codebase
42+
from codegen.extensions.graph.main import visualize_codebase
43+
44+
# parse codebase
45+
codebase = Codebase("path/to/codebase")
46+
47+
# export to Neo4j
48+
visualize_codebase(codebase, "bolt://localhost:7687", "neo4j", "password")
49+
```
50+
51+
## Visualization
52+
53+
Once exported, you can open the Neo4j browser at `http://localhost:7474`, sign in with the username `neo4j` and the password `password`, and use the following Cypher queries to visualize the codebase:
54+
55+
### Class Hierarchy
56+
57+
```cypher
58+
Match (s: Class )-[r: INHERITS_FROM*]-> (e:Class) RETURN s, e LIMIT 10
59+
```
60+
<Frame caption="Class hierarchy for a codebase">
61+
<img src="/images/neo4j-class-hierarchy.png" />
62+
</Frame>
63+
64+
### Methods Defined by Each Class
65+
66+
```cypher
67+
Match (s: Class )-[r: DEFINES]-> (e:Method) RETURN s, e LIMIT 10
68+
```
69+
<Frame caption="Methods defined by each class">
70+
<img src="/images/neo4j-class-methods.png" />
71+
</Frame>
72+
73+
### Function Calls
74+
75+
```cypher
76+
Match (s: Func )-[r: CALLS]-> (e:Func) RETURN s, e LIMIT 10
77+
```
78+
79+
<Frame caption="Function call graph for a codebase">
80+
<img src="/images/neo4j-function-calls.png" />
81+
</Frame>
82+
83+
### Call Graph
84+
85+
```cypher
86+
Match path = (:(Method|Func)) -[:CALLS*5..10]-> (:(Method|Func))
87+
Return path
88+
LIMIT 20
89+
```
90+
91+
<Frame caption="Call graph for a codebase">
92+
<img src="/images/neo4j-call-graph.png" />
93+
</Frame>

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ dependencies = [
6969
"langchain_openai",
7070
"numpy>=2.2.2",
7171
"mcp[cli]",
72+
"neo4j",
7273
]
7374

7475
license = { text = "Apache-2.0" }

src/codegen/extensions/graph/__init__.py

Whitespace-only changes.
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from typing import Optional
2+
3+
from codegen.extensions.graph.utils import Node, NodeLabel, Relation, RelationLabel, SimpleGraph
4+
from codegen.sdk.code_generation.doc_utils.utils import safe_get_class
5+
from codegen.sdk.core.class_definition import Class
6+
from codegen.sdk.core.external_module import ExternalModule
7+
from codegen.sdk.core.function import Function
8+
from codegen.sdk.python.class_definition import PyClass
9+
10+
11+
def create_codebase_graph(codebase):
12+
"""Create a SimpleGraph representing the codebase structure."""
13+
# Initialize graph
14+
graph = SimpleGraph()
15+
16+
# Track existing nodes by name to prevent duplicates
17+
node_registry = {} # name -> node_id mapping
18+
19+
def get_or_create_node(name: str, label: NodeLabel, parent_name: Optional[str] = None, properties: dict | None = None):
20+
"""Get existing node or create new one if it doesn't exist."""
21+
full_name = f"{parent_name}.{name}" if parent_name and parent_name != "Class" else name
22+
if full_name in node_registry:
23+
return graph.nodes[node_registry[full_name]]
24+
25+
node = Node(name=name, full_name=full_name, label=label.value, properties=properties or {})
26+
node_registry[full_name] = node.id
27+
graph.add_node(node)
28+
return node
29+
30+
def create_class_node(class_def):
31+
"""Create a node for a class definition."""
32+
return get_or_create_node(
33+
name=class_def.name,
34+
label=NodeLabel.CLASS,
35+
properties={
36+
"filepath": class_def.filepath if hasattr(class_def, "filepath") else "",
37+
"source": class_def.source if hasattr(class_def, "source") else "",
38+
"type": "class",
39+
},
40+
)
41+
42+
def create_function_node(func):
43+
"""Create a node for a function/method."""
44+
class_name = None
45+
if func.is_method:
46+
class_name = func.parent_class.name
47+
48+
return get_or_create_node(
49+
name=func.name,
50+
label=NodeLabel.METHOD if class_name else NodeLabel.FUNCTION,
51+
parent_name=class_name,
52+
properties={
53+
"filepath": func.filepath if hasattr(func, "filepath") else "",
54+
"is_async": func.is_async if hasattr(func, "is_async") else False,
55+
"source": func.source if hasattr(func, "source") else "",
56+
"type": "method" if class_name else "function",
57+
},
58+
)
59+
60+
def create_function_call_node(func_call):
61+
"""Create a node for a function call."""
62+
func_def = func_call.function_definition
63+
if not func_def:
64+
return None
65+
if isinstance(func_def, ExternalModule):
66+
parent_class = safe_get_class(codebase, func_def.name)
67+
if parent_class and parent_class.get_method(func_call.name):
68+
return create_function_node(parent_class.get_method(func_call.name))
69+
else:
70+
return None
71+
72+
call_node = None
73+
if isinstance(func_def, Function):
74+
call_node = create_function_node(func_def)
75+
76+
elif isinstance(func_def, Class):
77+
call_node = create_class_node(func_def)
78+
79+
return call_node
80+
81+
# Process all classes
82+
for class_def in codebase.classes:
83+
class_node = create_class_node(class_def)
84+
85+
# Process methods
86+
methods = class_def.methods
87+
for method in methods:
88+
method_node = create_function_node(method)
89+
90+
# Add DEFINES relation
91+
defines_relation = Relation(
92+
label=RelationLabel.DEFINES.value, source_id=class_node.id, target_id=method_node.id, properties={"relationship_description": "The parent class defines the method."}
93+
)
94+
graph.add_relation(defines_relation)
95+
96+
for call in method.function_calls:
97+
call_node = create_function_call_node(call)
98+
if call_node and call_node != method_node:
99+
call_relation = Relation(
100+
label=RelationLabel.CALLS.value, source_id=method_node.id, target_id=call_node.id, properties={"relationship_description": f"The method calls the {call_node.label}."}
101+
)
102+
graph.add_relation(call_relation)
103+
104+
# Add inheritance relations
105+
if class_def.parent_classes:
106+
for parent in class_def.parent_classes:
107+
if not isinstance(parent, PyClass):
108+
try:
109+
parent = codebase.get_class(parent.name, optional=True)
110+
if not parent:
111+
continue
112+
except Exception as e:
113+
print(f"parent not found: {e}")
114+
continue
115+
if not hasattr(parent, "name"):
116+
continue
117+
parent_node = create_class_node(parent)
118+
119+
inherits_relation = Relation(
120+
label=RelationLabel.INHERITS_FROM.value,
121+
source_id=class_node.id,
122+
target_id=parent_node.id,
123+
properties={"relationship_description": "The child class inherits from the parent class."},
124+
)
125+
graph.add_relation(inherits_relation)
126+
127+
for func in codebase.functions:
128+
func_node = create_function_node(func)
129+
for call in func.function_calls:
130+
call_node = create_function_call_node(call)
131+
if call_node and call_node != func_node:
132+
call_relation = Relation(
133+
label=RelationLabel.CALLS.value, source_id=func_node.id, target_id=call_node.id, properties={"relationship_description": f"The function calls the {call_node.label}."}
134+
)
135+
graph.add_relation(call_relation)
136+
137+
return graph

src/codegen/extensions/graph/main.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from codegen import Codebase
2+
from codegen.extensions.graph.create_graph import create_codebase_graph
3+
from codegen.extensions.graph.neo4j_exporter import Neo4jExporter
4+
from codegen.shared.enums.programming_language import ProgrammingLanguage
5+
6+
7+
def visualize_codebase(codebase, neo4j_uri: str, username: str, password: str):
8+
"""Create and visualize a codebase graph in Neo4j.
9+
10+
Args:
11+
codebase: The codebase object to analyze
12+
neo4j_uri: URI for Neo4j database
13+
username: Neo4j username
14+
password: Neo4j password
15+
"""
16+
# Create the graph using your existing function
17+
graph = create_codebase_graph(codebase)
18+
19+
# Export to Neo4j
20+
exporter = Neo4jExporter(neo4j_uri, username, password)
21+
try:
22+
exporter.export_graph(graph)
23+
print("Successfully exported graph to Neo4j")
24+
25+
# Print some useful Cypher queries for visualization
26+
print("\nUseful Cypher queries for visualization:")
27+
print("\n1. View all nodes and relationships:")
28+
print("MATCH (n)-[r]->(m) RETURN n, r, m")
29+
30+
print("\n2. View class hierarchy:")
31+
print("MATCH (c:Class)-[r:INHERITS_FROM]->(parent:Class) RETURN c, r, parent")
32+
33+
print("\n3. View methods defined by each class:")
34+
print("MATCH (c:Class)-[r:DEFINES]->(m:Method) RETURN c, r, m")
35+
36+
finally:
37+
exporter.close()
38+
39+
40+
if __name__ == "__main__":
41+
# Initialize codebase
42+
codebase = Codebase("../../", programming_language=ProgrammingLanguage.PYTHON)
43+
visualize_codebase(codebase, "bolt://localhost:7687", "neo4j", "password")
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from neo4j import GraphDatabase
2+
3+
from codegen.extensions.graph.utils import SimpleGraph
4+
5+
6+
class Neo4jExporter:
7+
"""Class to handle exporting the codebase graph to Neo4j."""
8+
9+
def __init__(self, uri: str, username: str, password: str):
10+
"""Initialize Neo4j connection."""
11+
self.driver = GraphDatabase.driver(uri, auth=(username, password))
12+
13+
def close(self):
14+
"""Close the Neo4j connection."""
15+
self.driver.close()
16+
17+
def clear_database(self):
18+
"""Clear all nodes and relationships in the database."""
19+
with self.driver.session() as session:
20+
session.run("MATCH (n) DETACH DELETE n")
21+
22+
def export_graph(self, graph: SimpleGraph):
23+
"""Export the SimpleGraph to Neo4j."""
24+
self.clear_database()
25+
26+
with self.driver.session() as session:
27+
# Create nodes
28+
for node in graph.nodes.values():
29+
properties = {"name": node.name, "full_name": node.full_name, **{k: str(v) if isinstance(v, (dict, list)) else v for k, v in node.properties.items()}}
30+
31+
query = f"CREATE (n:{node.label} {{{', '.join(f'{k}: ${k}' for k in properties.keys())}}})"
32+
session.run(query, properties)
33+
34+
# Create relationships
35+
for relation in graph.relations:
36+
source_node = graph.nodes[relation.source_id]
37+
target_node = graph.nodes[relation.target_id]
38+
39+
properties = {**{k: str(v) if isinstance(v, (dict, list)) else v for k, v in relation.properties.items()}}
40+
41+
query = (
42+
f"MATCH (source:{source_node.label} {{full_name: $source_name}}), "
43+
f"(target:{target_node.label} {{full_name: $target_name}}) "
44+
f"CREATE (source)-[r:{relation.label} "
45+
f"{{{', '.join(f'{k}: ${k}' for k in properties.keys())}}}]->"
46+
f"(target)"
47+
)
48+
49+
session.run(query, {"source_name": source_node.full_name, "target_name": target_node.full_name, **properties})

0 commit comments

Comments
 (0)