Skip to content

Commit d614571

Browse files
committed
Enhance model builder selection logic to include model size
1 parent 0900405 commit d614571

File tree

5 files changed

+74
-2
lines changed

5 files changed

+74
-2
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
accelerate==0.27.2
2+
huggingface-hub==0.20.3
3+
psutil==5.9.8

requirements/extras/test_requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,6 @@ tritonclient[http]<2.37.0
3939
onnx==1.14.1
4040
# tf2onnx==1.15.1
4141
nbformat>=5.9,<6
42+
accelerate==0.27.2
43+
huggingface-hub==0.20.3
44+
psutil==5.9.8

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def read_requirements(filename):
7979
"feature-processor": read_requirements(
8080
"requirements/extras/feature-processor_requirements.txt"
8181
),
82+
"huggingface": read_requirements("requirements/extras/huggingface_requirements.txt"),
8283
}
8384
# Meta dependency groups
8485
extras["all"] = [item for group in extras.values() for item in group]

src/sagemaker/serve/builder/model_builder.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020

2121
from pathlib import Path
2222

23+
import accelerate
24+
from accelerate.commands.estimate import estimate_command_parser, gather_data
25+
2326
from sagemaker import Session
27+
from sagemaker.djl_inference import defaults
2428
from sagemaker.model import Model
2529
from sagemaker.base_predictor import PredictorBase
2630
from sagemaker.serializers import NumpySerializer, TorchTensorSerializer
@@ -39,6 +43,7 @@
3943
from sagemaker.serve.save_retrive.version_1_0_0.metadata.metadata import Metadata
4044
from sagemaker.serve.spec.inference_spec import InferenceSpec
4145
from sagemaker.serve.utils.predictors import _get_local_mode_predictor
46+
from sagemaker.serve.utils.hardware_detector import _get_gpu_info, _get_gpu_info_fallback
4247
from sagemaker.serve.detector.image_detector import (
4348
auto_detect_container,
4449
_detect_framework_and_version,
@@ -65,6 +70,8 @@
6570
ModelServer.DJL_SERVING,
6671
}
6772

73+
MIB_CONVERSION_FACTOR = 0.00000095367431640625
74+
MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer
6875

6976
# pylint: disable=attribute-defined-outside-init
7077
@dataclass
@@ -616,6 +623,10 @@ def build(
616623
)
617624
if hf_model_md.get("pipeline_tag") == "text-generation": # pylint: disable=R1705
618625
return self._build_for_tgi()
626+
elif self.can_fit_on_single_gpu():
627+
return self._build_for_transformers()
628+
elif self.model in defaults.FASTER_TRANSFORMER_SUPPORTED_ARCHITECTURES:
629+
return self._build_for_djl()
619630
else:
620631
return self._build_for_transformers()
621632

@@ -672,3 +683,58 @@ def validate(self, model_dir: str) -> Type[bool]:
672683
"""
673684

674685
return get_metadata(model_dir)
686+
687+
def total_inference_model_size_mib(self):
688+
"""Calculates the model size from HF accelerate
689+
690+
This function gets the model size from accelerate. It also adds a
691+
padding and converts to size MiB. When performing inference, expect
692+
to add up to an additional 20% to the given model size as found by EleutherAI.
693+
"""
694+
dtypes = "float32"
695+
try:
696+
if self.env_vars.get("dtypes"):
697+
dtypes = self.env_vars.get("dtypes")
698+
699+
parser = estimate_command_parser()
700+
args = parser.parse_args([self.model, "--dtypes", dtypes])
701+
except ValueError:
702+
logging.error("Args specified incorrect for model %s", self.model)
703+
704+
output = gather_data(
705+
args
706+
) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
707+
708+
total_memory_size_mib = MEMORY_BUFFER_MULTIPLIER * output[0][2] * MIB_CONVERSION_FACTOR
709+
logger.info("Total memory size MIB: %s", total_memory_size_mib)
710+
return total_memory_size_mib
711+
712+
def can_fit_on_single_gpu(self):
713+
"""Check if model can fit on a single GPU
714+
715+
This function gets the GPU info or fallback to set the size of a single GPU.
716+
If the size of the model is <= single gpu memory size, returns true.
717+
"""
718+
try:
719+
gpu_info = _get_gpu_info(self.instance_type, self.sagemaker_session)
720+
logger.info("GPU info %s for instance %s", gpu_info, self.instance_type)
721+
single_gpu_size_mib = gpu_info[1] / gpu_info[0]
722+
except ValueError:
723+
gpu_fallback = _get_gpu_info_fallback(
724+
self.instance_type, self.sagemaker_session.boto_region_name
725+
)
726+
logger.info("GPU fallback picked up %s", gpu_fallback)
727+
single_gpu_size_mib = gpu_fallback[1] / gpu_fallback[0]
728+
729+
if single_gpu_size_mib is None:
730+
logger.info("Unable to determine single GPU size for instance %s", self.instance_type)
731+
return False
732+
733+
if self.total_inference_model_size_mib() <= single_gpu_size_mib:
734+
logger.info(
735+
"Total inference model size MIB %s, single GPU size for instance MIB %s",
736+
self.total_inference_model_size_mib(),
737+
single_gpu_size_mib,
738+
)
739+
return True
740+
return False

tests/integ/sagemaker/serve/test_model_builder_gpu.py renamed to tests/integ/sagemaker/serve/test_serve_model_builder_gpu.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@
1313
from __future__ import absolute_import
1414

1515
import pytest
16-
from sagemaker.serve import Mode
17-
from sagemaker.serve.builder.model_builder import ModelBuilder
1816
from sagemaker.serve.builder.schema_builder import SchemaBuilder
17+
from sagemaker.serve.builder.model_builder import ModelBuilder, Mode
1918
from tests.integ.sagemaker.serve.constants import (
2019
HF_DIR,
2120
PYTHON_VERSION_IS_NOT_310,

0 commit comments

Comments
 (0)