Skip to content

Commit 1ec29e3

Browse files
author
Jonathan Makunga
committed
Debugging
1 parent 4d06e66 commit 1ec29e3

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

src/sagemaker/serve/builder/jumpstart_builder.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,12 @@ def _build_for_tgi_jumpstart(self):
254254

255255
self.pysdk_model.env.update(env)
256256

257+
def _log_delete_me(self, data: any):
258+
"""Placeholder docstring"""
259+
logger.debug("*****************************************")
260+
logger.debug(data)
261+
logger.debug("*****************************************")
262+
257263
@_capture_telemetry("djl_jumpstart.tune")
258264
def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
259265
"""pass"""
@@ -264,18 +270,21 @@ def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
264270
return self.pysdk_model
265271

266272
initial_model_configuration = copy.deepcopy(self.pysdk_model.env)
273+
self._log_delete_me(f"initial_model_configuration: {initial_model_configuration}")
267274

275+
self._log_delete_me(f"self.js_model_config: {self.js_model_config}")
268276
admissible_tensor_parallel_degrees = _get_admissible_tensor_parallel_degrees(self.js_model_config)
277+
self._log_delete_me(f"admissible_tensor_parallel_degrees: {admissible_tensor_parallel_degrees}")
269278

270279
benchmark_results = {}
271280
best_tuned_combination = None
272281
timeout = datetime.now() + timedelta(seconds=max_tuning_duration)
273282
for tensor_parallel_degree in admissible_tensor_parallel_degrees:
283+
self._log_delete_me(f"tensor_parallel_degree: {tensor_parallel_degree}")
274284
if datetime.now() > timeout:
275285
logger.info("Max tuning duration reached. Tuning stopped.")
276286
break
277287

278-
sagemaker_model_server_workers = None
279288
self.pysdk_model.env.update({
280289
"OPTION_TENSOR_PARALLEL_DEGREE": str(tensor_parallel_degree)
281290
})
@@ -311,7 +320,7 @@ def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
311320
best_tuned_combination = [
312321
avg_latency,
313322
tensor_parallel_degree,
314-
sagemaker_model_server_workers,
323+
None,
315324
p90,
316325
avg_tokens_per_second,
317326
throughput_per_second,
@@ -321,7 +330,7 @@ def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
321330
tuned_configuration = [
322331
avg_latency,
323332
tensor_parallel_degree,
324-
sagemaker_model_server_workers,
333+
None,
325334
p90,
326335
avg_tokens_per_second,
327336
throughput_per_second,
@@ -418,19 +427,23 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
418427
return self.pysdk_model
419428

420429
initial_model_configuration = copy.deepcopy(self.pysdk_model.env)
430+
self._log_delete_me(f"initial_model_configuration: {initial_model_configuration}")
431+
432+
self._log_delete_me(f"self.js_model_config: {self.js_model_config}")
421433

422434
admissible_tensor_parallel_degrees = _get_admissible_tensor_parallel_degrees(self.js_model_config)
435+
self._log_delete_me(f"admissible_tensor_parallel_degrees: {admissible_tensor_parallel_degrees}")
423436

424437
benchmark_results = {}
425438
best_tuned_combination = None
426439
timeout = datetime.now() + timedelta(seconds=max_tuning_duration)
427440
for tensor_parallel_degree in admissible_tensor_parallel_degrees:
441+
self._log_delete_me(f"tensor_parallel_degree: {tensor_parallel_degree}")
428442
if datetime.now() > timeout:
429443
logger.info("Max tuning duration reached. Tuning stopped.")
430444
break
431445

432446
sm_num_gpus = tensor_parallel_degree
433-
sagemaker_model_server_workers = None
434447
self.pysdk_model.env.update({
435448
"SM_NUM_GPUS": str(sm_num_gpus)
436449
})
@@ -466,7 +479,7 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
466479
best_tuned_combination = [
467480
avg_latency,
468481
sm_num_gpus,
469-
sagemaker_model_server_workers,
482+
None,
470483
p90,
471484
avg_tokens_per_second,
472485
throughput_per_second,
@@ -476,7 +489,7 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
476489
tuned_configuration = [
477490
avg_latency,
478491
sm_num_gpus,
479-
sagemaker_model_server_workers,
492+
None,
480493
p90,
481494
avg_tokens_per_second,
482495
throughput_per_second,

0 commit comments

Comments
 (0)