Skip to content

Commit a345a1d

Browse files
kthuinnshah1
andauthored
refactor: Remove explicit callings to garbage collect (#55)
Co-authored-by: Neelay Shah <[email protected]>
1 parent 128abc3 commit a345a1d

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

src/model.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,6 @@ def response_loop(self):
288288
if item is None:
289289
break
290290
response_sender, response, response_flag = item
291-
del item
292291
try:
293292
response_sender.send(response, response_flag)
294293
except Exception as e:
@@ -298,9 +297,6 @@ def response_loop(self):
298297
finally:
299298
if response_flag == pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL:
300299
self.ongoing_request_count -= 1
301-
del response_sender
302-
if self.ongoing_request_count == 0:
303-
gc.collect()
304300

305301
def create_response(self, vllm_output, prepend_input):
306302
"""
@@ -447,9 +443,6 @@ async def generate(self, request):
447443
finally:
448444
if decrement_ongoing_request_count:
449445
self.ongoing_request_count -= 1
450-
del response_sender
451-
if self.ongoing_request_count == 0:
452-
gc.collect()
453446

454447
def verify_loras(self, request):
455448
# We will check if the requested lora exists here, if not we will send a
@@ -527,3 +520,9 @@ def finalize(self):
527520
if self._response_thread is not None:
528521
self._response_thread.join()
529522
self._response_thread = None
523+
524+
# When using parallel tensors, the stub process may not shutdown due to
525+
# unreleased references, so manually run the garbage collector once.
526+
self.logger.log_info("[vllm] Running Garbage Collector on finalize...")
527+
gc.collect()
528+
self.logger.log_info("[vllm] Garbage Collector on finalize... done")

0 commit comments

Comments
 (0)