@@ -254,6 +254,12 @@ def _build_for_tgi_jumpstart(self):
254
254
255
255
self .pysdk_model .env .update (env )
256
256
257
+ def _log_delete_me (self , data : any ):
258
+ """Placeholder docstring"""
259
+ logger .debug ("*****************************************" )
260
+ logger .debug (data )
261
+ logger .debug ("*****************************************" )
262
+
257
263
@_capture_telemetry ("djl_jumpstart.tune" )
258
264
def tune_for_djl_jumpstart (self , max_tuning_duration : int = 1800 ):
259
265
"""pass"""
@@ -264,18 +270,21 @@ def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
264
270
return self .pysdk_model
265
271
266
272
initial_model_configuration = copy .deepcopy (self .pysdk_model .env )
273
+ self ._log_delete_me (f"initial_model_configuration: { initial_model_configuration } " )
267
274
275
+ self ._log_delete_me (f"self.js_model_config: { self .js_model_config } " )
268
276
admissible_tensor_parallel_degrees = _get_admissible_tensor_parallel_degrees (self .js_model_config )
277
+ self ._log_delete_me (f"admissible_tensor_parallel_degrees: { admissible_tensor_parallel_degrees } " )
269
278
270
279
benchmark_results = {}
271
280
best_tuned_combination = None
272
281
timeout = datetime .now () + timedelta (seconds = max_tuning_duration )
273
282
for tensor_parallel_degree in admissible_tensor_parallel_degrees :
283
+ self ._log_delete_me (f"tensor_parallel_degree: { tensor_parallel_degree } " )
274
284
if datetime .now () > timeout :
275
285
logger .info ("Max tuning duration reached. Tuning stopped." )
276
286
break
277
287
278
- sagemaker_model_server_workers = None
279
288
self .pysdk_model .env .update ({
280
289
"OPTION_TENSOR_PARALLEL_DEGREE" : str (tensor_parallel_degree )
281
290
})
@@ -311,7 +320,7 @@ def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
311
320
best_tuned_combination = [
312
321
avg_latency ,
313
322
tensor_parallel_degree ,
314
- sagemaker_model_server_workers ,
323
+ None ,
315
324
p90 ,
316
325
avg_tokens_per_second ,
317
326
throughput_per_second ,
@@ -321,7 +330,7 @@ def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
321
330
tuned_configuration = [
322
331
avg_latency ,
323
332
tensor_parallel_degree ,
324
- sagemaker_model_server_workers ,
333
+ None ,
325
334
p90 ,
326
335
avg_tokens_per_second ,
327
336
throughput_per_second ,
@@ -418,19 +427,23 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
418
427
return self .pysdk_model
419
428
420
429
initial_model_configuration = copy .deepcopy (self .pysdk_model .env )
430
+ self ._log_delete_me (f"initial_model_configuration: { initial_model_configuration } " )
431
+
432
+ self ._log_delete_me (f"self.js_model_config: { self .js_model_config } " )
421
433
422
434
admissible_tensor_parallel_degrees = _get_admissible_tensor_parallel_degrees (self .js_model_config )
435
+ self ._log_delete_me (f"admissible_tensor_parallel_degrees: { admissible_tensor_parallel_degrees } " )
423
436
424
437
benchmark_results = {}
425
438
best_tuned_combination = None
426
439
timeout = datetime .now () + timedelta (seconds = max_tuning_duration )
427
440
for tensor_parallel_degree in admissible_tensor_parallel_degrees :
441
+ self ._log_delete_me (f"tensor_parallel_degree: { tensor_parallel_degree } " )
428
442
if datetime .now () > timeout :
429
443
logger .info ("Max tuning duration reached. Tuning stopped." )
430
444
break
431
445
432
446
sm_num_gpus = tensor_parallel_degree
433
- sagemaker_model_server_workers = None
434
447
self .pysdk_model .env .update ({
435
448
"SM_NUM_GPUS" : str (sm_num_gpus )
436
449
})
@@ -466,7 +479,7 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
466
479
best_tuned_combination = [
467
480
avg_latency ,
468
481
sm_num_gpus ,
469
- sagemaker_model_server_workers ,
482
+ None ,
470
483
p90 ,
471
484
avg_tokens_per_second ,
472
485
throughput_per_second ,
@@ -476,7 +489,7 @@ def tune_for_tgi_jumpstart(self, max_tuning_duration: int = 1800):
476
489
tuned_configuration = [
477
490
avg_latency ,
478
491
sm_num_gpus ,
479
- sagemaker_model_server_workers ,
492
+ None ,
480
493
p90 ,
481
494
avg_tokens_per_second ,
482
495
throughput_per_second ,
0 commit comments