Skip to content

Commit fe43451

Browse files
committed
Enhance model builder selection logic to include model size
1 parent 0900405 commit fe43451

File tree

5 files changed

+80
-2
lines changed

5 files changed

+80
-2
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
accelerate
2+
numpy>=1.17
3+
packaging>=20.0
4+
psutil
5+
pyyaml
6+
torch>=1.10.0
7+
huggingface_hub

requirements/extras/test_requirements.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,10 @@ tritonclient[http]<2.37.0
3939
onnx==1.14.1
4040
# tf2onnx==1.15.1
4141
nbformat>=5.9,<6
42+
accelerate
43+
numpy>=1.17
44+
packaging>=20.0
45+
psutil
46+
pyyaml
47+
torch>=1.10.0
48+
huggingface_hub

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def read_requirements(filename):
7979
"feature-processor": read_requirements(
8080
"requirements/extras/feature-processor_requirements.txt"
8181
),
82+
"huggingface": read_requirements("requirements/extras/huggingface_requirements.txt"),
8283
}
8384
# Meta dependency groups
8485
extras["all"] = [item for group in extras.values() for item in group]

src/sagemaker/serve/builder/model_builder.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020

2121
from pathlib import Path
2222

23+
from accelerate.commands.estimate import estimate_command_parser, gather_data
2324
from sagemaker import Session
25+
from sagemaker.djl_inference import defaults
2426
from sagemaker.model import Model
2527
from sagemaker.base_predictor import PredictorBase
2628
from sagemaker.serializers import NumpySerializer, TorchTensorSerializer
@@ -39,6 +41,7 @@
3941
from sagemaker.serve.save_retrive.version_1_0_0.metadata.metadata import Metadata
4042
from sagemaker.serve.spec.inference_spec import InferenceSpec
4143
from sagemaker.serve.utils.predictors import _get_local_mode_predictor
44+
from sagemaker.serve.utils.hardware_detector import _get_gpu_info, _get_gpu_info_fallback
4245
from sagemaker.serve.detector.image_detector import (
4346
auto_detect_container,
4447
_detect_framework_and_version,
@@ -65,6 +68,8 @@
6568
ModelServer.DJL_SERVING,
6669
}
6770

71+
MIB_CONVERSION_FACTOR = 0.00000095367431640625
72+
MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer
6873

6974
# pylint: disable=attribute-defined-outside-init
7075
@dataclass
@@ -616,6 +621,10 @@ def build(
616621
)
617622
if hf_model_md.get("pipeline_tag") == "text-generation": # pylint: disable=R1705
618623
return self._build_for_tgi()
624+
elif self.can_fit_on_single_gpu():
625+
return self._build_for_transformers()
626+
elif self.model in defaults.FASTER_TRANSFORMER_SUPPORTED_ARCHITECTURES:
627+
return self._build_for_djl()
619628
else:
620629
return self._build_for_transformers()
621630

@@ -672,3 +681,58 @@ def validate(self, model_dir: str) -> Type[bool]:
672681
"""
673682

674683
return get_metadata(model_dir)
684+
685+
def total_inference_model_size_mib(self):
686+
"""Calculates the model size from HF accelerate
687+
688+
This function gets the model size from accelerate. It also adds a
689+
padding and converts to size MiB. When performing inference, expect
690+
to add up to an additional 20% to the given model size as found by EleutherAI.
691+
"""
692+
dtypes = "float32"
693+
try:
694+
if self.env_vars.get("dtypes"):
695+
dtypes = self.env_vars.get("dtypes")
696+
697+
parser = estimate_command_parser()
698+
args = parser.parse_args([self.model, "--dtypes", dtypes])
699+
except ValueError:
700+
logging.error("Args specified incorrect for model %s", self.model)
701+
702+
output = gather_data(
703+
args
704+
) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
705+
706+
total_memory_size_mib = MEMORY_BUFFER_MULTIPLIER * output[0][2] * MIB_CONVERSION_FACTOR
707+
logger.info("Total memory size MIB: %s", total_memory_size_mib)
708+
return total_memory_size_mib
709+
710+
def can_fit_on_single_gpu(self):
711+
"""Check if model can fit on a single GPU
712+
713+
This function gets the GPU info or fallback to set the size of a single GPU.
714+
If the size of the model is <= single gpu memory size, returns true.
715+
"""
716+
try:
717+
gpu_info = _get_gpu_info(self.instance_type, self.sagemaker_session)
718+
logger.info("GPU info %s for instance %s", gpu_info, self.instance_type)
719+
single_gpu_size_mib = gpu_info[1] / gpu_info[0]
720+
except ValueError:
721+
gpu_fallback = _get_gpu_info_fallback(
722+
self.instance_type, self.sagemaker_session.boto_region_name
723+
)
724+
logger.info("GPU fallback picked up %s", gpu_fallback)
725+
single_gpu_size_mib = gpu_fallback[1] / gpu_fallback[0]
726+
727+
if single_gpu_size_mib is None:
728+
logger.info("Unable to determine single GPU size for instance %s", self.instance_type)
729+
return False
730+
731+
if self.total_inference_model_size_mib() <= single_gpu_size_mib:
732+
logger.info(
733+
"Total inference model size MIB %s, single GPU size for instance MIB %s",
734+
self.total_inference_model_size_mib(),
735+
single_gpu_size_mib,
736+
)
737+
return True
738+
return False

tests/integ/sagemaker/serve/test_model_builder_gpu.py renamed to tests/integ/sagemaker/serve/test_serve_model_builder_gpu.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@
1313
from __future__ import absolute_import
1414

1515
import pytest
16-
from sagemaker.serve import Mode
17-
from sagemaker.serve.builder.model_builder import ModelBuilder
1816
from sagemaker.serve.builder.schema_builder import SchemaBuilder
17+
from sagemaker.serve.builder.model_builder import ModelBuilder, Mode
1918
from tests.integ.sagemaker.serve.constants import (
2019
HF_DIR,
2120
PYTHON_VERSION_IS_NOT_310,

0 commit comments

Comments
 (0)