Skip to content

Commit f42578c

Browse files
AviAvniswilly22
andauthored
move to async batches (#75)
* move to async batches * fix build * fix tests * fix perf * cap async tasks * pull 1 task at time * Update query_buffer.py * Update query_buffer.py Co-authored-by: Roi Lipman <[email protected]>
1 parent 4c7f57a commit f42578c

File tree

5 files changed

+33
-7
lines changed

5 files changed

+33
-7
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ vulture = "^2.3"
4141
pytest = "^6.2.4"
4242
pytest-cov = "^2.12.1"
4343
redisgraph = "^2.4.0"
44+
pathos = "^0.2.8"
4445

4546
[build-system]
4647
requires = ["poetry-core>=1.0.0"]

redisgraph_bulk_loader/bulk_insert.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ def bulk_insert(graph, host, port, password, user, unix_socket_path, ssl_keyfile
145145

146146
# Send all remaining tokens to Redis
147147
query_buf.send_buffer()
148+
query_buf.wait_pool()
148149

149150
end_time = timer()
150151
query_buf.report_completion(end_time - start_time)

redisgraph_bulk_loader/label.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def update_node_dictionary(self, identifier):
5656

5757
def process_entities(self):
5858
entities_created = 0
59-
with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str) as reader:
59+
with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str, update_min_steps=100) as reader:
6060
for row in reader:
6161
self.validate_row(row)
6262

redisgraph_bulk_loader/query_buffer.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
from pathos.pools import ThreadPool as Pool
2+
3+
def run(client, graphname, args):
4+
result = client.execute_command("GRAPH.BULK", graphname, *args)
5+
stats = result.split(', '.encode())
6+
return stats
7+
18
class QueryBuffer:
29
def __init__(self, graphname, client, config):
310
self.nodes = None
@@ -30,7 +37,9 @@ def __init__(self, graphname, client, config):
3037
self.nodes_created = 0 # Total number of nodes created
3138
self.relations_created = 0 # Total number of relations created
3239

33-
# TODO consider using a queue to send commands asynchronously
40+
self.pool = Pool(nodes=1)
41+
self.tasks = []
42+
3443
def send_buffer(self):
3544
"""Send all pending inserts to Redis"""
3645
# Do nothing if we have no entities
@@ -43,10 +52,8 @@ def send_buffer(self):
4352
args.insert(0, "BEGIN")
4453
self.initial_query = False
4554

46-
result = self.client.execute_command("GRAPH.BULK", self.graphname, *args)
47-
stats = result.split(', '.encode())
48-
self.nodes_created += int(stats[0].split(' '.encode())[0])
49-
self.relations_created += int(stats[1].split(' '.encode())[0])
55+
task = self.pool.apipe(run, self.client, self.graphname, args)
56+
self.add_task(task)
5057

5158
self.clear_buffer()
5259

@@ -59,6 +66,23 @@ def clear_buffer(self):
5966
self.buffer_size = 0
6067
self.node_count = 0
6168
self.relation_count = 0
69+
70+
def add_task(self, task):
71+
self.tasks.append(task)
72+
if len(self.tasks) == 5:
73+
task = self.tasks.pop(0)
74+
stats = task.get()
75+
self.update_stats(stats)
76+
77+
def wait_pool(self):
78+
for task in self.tasks:
79+
stats = task.get()
80+
self.update_stats(stats)
81+
self.tasks.clear()
82+
83+
def update_stats(self, stats):
84+
self.nodes_created += int(stats[0].split(' '.encode())[0])
85+
self.relations_created += int(stats[1].split(' '.encode())[0])
6286

6387
def report_completion(self, runtime):
6488
print("Construction of graph '%s' complete: %d nodes created, %d relations created in %f seconds"

redisgraph_bulk_loader/relation_type.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def post_process_header_with_schema(self, header):
4747

4848
def process_entities(self):
4949
entities_created = 0
50-
with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str) as reader:
50+
with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str, update_min_steps=100) as reader:
5151
for row in reader:
5252
self.validate_row(row)
5353
try:

0 commit comments

Comments
 (0)