|
25 | 25 | from sagemaker.serve.model_server.djl_serving.utils import _get_admissible_tensor_parallel_degrees
|
26 | 26 | from sagemaker.serve.model_server.tgi.prepare import prepare_tgi_js_resources, _create_dir_structure
|
27 | 27 | from sagemaker.serve.mode.function_pointers import Mode
|
28 |
| -from sagemaker.serve.utils.exceptions import LocalDeepPingException, LocalModelOutOfMemoryException, \ |
29 |
| - LocalModelInvocationException, LocalModelLoadException, SkipTuningComboException |
| 28 | +from sagemaker.serve.utils.exceptions import ( |
| 29 | + LocalDeepPingException, |
| 30 | + LocalModelOutOfMemoryException, |
| 31 | + LocalModelInvocationException, |
| 32 | + LocalModelLoadException, |
| 33 | + SkipTuningComboException |
| 34 | +) |
30 | 35 | from sagemaker.serve.utils.predictors import (
|
31 | 36 | DjlLocalModePredictor,
|
32 | 37 | TgiLocalModePredictor,
|
33 | 38 | )
|
34 | 39 | from sagemaker.serve.utils.local_hardware import _get_nb_instance, _get_ram_usage_mb
|
35 | 40 | from sagemaker.serve.utils.telemetry_logger import _capture_telemetry
|
36 |
| -from sagemaker.serve.utils.tuning import _serial_benchmark, _concurrent_benchmark, _more_performant, \ |
37 |
| - _pretty_print_results_tgi, _pretty_print_results_tgi_js |
| 41 | +from sagemaker.serve.utils.tuning import ( |
| 42 | + _serial_benchmark, |
| 43 | + _concurrent_benchmark, |
| 44 | + _more_performant, |
| 45 | + _pretty_print_results_djl_js, |
| 46 | + _pretty_print_results_tgi_js |
| 47 | +) |
38 | 48 | from sagemaker.serve.utils.types import ModelServer
|
39 | 49 | from sagemaker.base_predictor import PredictorBase
|
40 | 50 | from sagemaker.jumpstart.model import JumpStartModel
|
@@ -375,7 +385,7 @@ def tune_for_djl_jumpstart(self, max_tuning_duration: int = 1800):
|
375 | 385 | "OPTION_TENSOR_PARALLEL_DEGREE": str(best_tuned_combination[1])
|
376 | 386 | })
|
377 | 387 |
|
378 |
| - _pretty_print_results_tgi(benchmark_results) |
| 388 | + _pretty_print_results_djl_js(benchmark_results) |
379 | 389 | logger.info(
|
380 | 390 | "Model Configuration: %s was most performant with avg latency: %s, "
|
381 | 391 | "p90 latency: %s, average tokens per second: %s, throughput/s: %s, "
|
|
0 commit comments