Skip to content

Fix: Accelerate packaging in ModelBuilder #4549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/sagemaker/serve/detector/dependency_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def capture_dependencies(dependencies: dict, work_dir: Path, capture_all: bool =

with open(path, "r") as f:
autodetect_depedencies = f.read().splitlines()
autodetect_depedencies.append("sagemaker>=2.199")
autodetect_depedencies.append("sagemaker[huggingface]>=2.199")
else:
autodetect_depedencies = ["sagemaker>=2.199"]
autodetect_depedencies = ["sagemaker[huggingface]>=2.199"]

module_version_dict = _parse_dependency_list(autodetect_depedencies)

Expand Down
23 changes: 15 additions & 8 deletions src/sagemaker/serve/utils/hardware_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@

from botocore.exceptions import ClientError

from accelerate.commands.estimate import estimate_command_parser, gather_data
from sagemaker import Session
from sagemaker.model import Model
from sagemaker import instance_types_gpu_info

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -116,18 +114,27 @@ def _format_instance_type(instance_type: str) -> str:
return ec2_instance


def _total_inference_model_size_mib(model: Model, dtype: str) -> int:
def _total_inference_model_size_mib(model: str, dtype: str) -> int:
"""Calculates the model size from HF accelerate

This function gets the model size from accelerate. It also adds a
padding and converts to size MiB. When performing inference, expect
to add up to an additional 20% to the given model size as found by EleutherAI.
"""
args = estimate_command_parser().parse_args([model, "--dtypes", dtype])

output = gather_data(
args
) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
output = None
try:
from accelerate.commands.estimate import estimate_command_parser, gather_data

args = estimate_command_parser().parse_args([model, "--dtypes", dtype])

output = gather_data(
args
) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
except ImportError:
logger.error(
"To enable Model size calculations: Install HuggingFace extras dependencies "
"using pip install 'sagemaker[huggingface]>=2.212.0'"
)

if output is None:
raise ValueError(f"Could not get Model size for {model}")
Expand Down
5 changes: 2 additions & 3 deletions tests/integ/sagemaker/serve/test_serve_pt_happy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
# flake8: noqa: F631
from __future__ import absolute_import

import pytest
Expand Down Expand Up @@ -221,10 +222,8 @@ def test_happy_pytorch_sagemaker_endpoint(
)
if caught_ex:
logger.exception(caught_ex)
ignore_if_worker_dies = "Worker died." in str(caught_ex)
# https://github.com/pytorch/serve/issues/3032
assert (
ignore_if_worker_dies
False,
), f"{caught_ex} was thrown when running pytorch squeezenet sagemaker endpoint test"


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_capture_dependencies(self, mock_subprocess, mock_file, mock_path):
call("custom_module==1.2.3\n"),
call("numpy==4.5\n"),
call("boto3=1.28.*\n"),
call("sagemaker>=2.199\n"),
call("sagemaker[huggingface]>=2.199\n"),
call("other_module@http://some/website.whl\n"),
]
mocked_writes.assert_has_calls(expected_calls)
Expand Down
8 changes: 6 additions & 2 deletions tests/unit/sagemaker/serve/utils/test_hardware_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ def test_format_instance_type_without_ml_success():
assert formatted_instance_type == "g5.48xlarge"


@patch("sagemaker.serve.utils.hardware_detector.estimate_command_parser")
@patch("sagemaker.serve.utils.hardware_detector.gather_data")
@patch("accelerate.commands.estimate.estimate_command_parser")
@patch("accelerate.commands.estimate.gather_data")
def test_total_inference_model_size_mib(
mock_gather_data,
mock_parser,
Expand All @@ -120,3 +120,7 @@ def test_total_inference_model_size_mib(

with pytest.raises(ValueError):
hardware_detector._total_inference_model_size_mib("stable-diffusion", "float32")

mock_parser.side_effect = ImportError
with pytest.raises(ValueError):
hardware_detector._total_inference_model_size_mib("stable-diffusion", "float32")