Merge branch 'master' into dependabot/pip/requirements/extras/black-24.3.0

benieric · web-flow · commit f51f1888a990 · 2024-04-01T10:55:36.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## v2.214.2 (2024-04-01)
+
+### Bug Fixes and Other Changes
+
+ * Skip JS Tune integration test
+ * bump apache-airflow version to 2.8.3
+ * bump onnx version to >=1.15.0
+ * Updates for DJL 0.27.0 release
+ * Tune (local mode) support for Jumpstart Models
+ * attach jumpstart estimator for gated model
+
 ## v2.214.1 (2024-03-27)
 
 ### Bug Fixes and Other Changes
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.214.2.dev0
+2.214.3.dev0
diff --git a/src/sagemaker/serve/detector/dependency_manager.py b/src/sagemaker/serve/detector/dependency_manager.py
@@ -54,9 +54,9 @@ def capture_dependencies(dependencies: dict, work_dir: Path, capture_all: bool =
 
         with open(path, "r") as f:
             autodetect_depedencies = f.read().splitlines()
-        autodetect_depedencies.append("sagemaker>=2.199")
+        autodetect_depedencies.append("sagemaker[huggingface]>=2.199")
     else:
-        autodetect_depedencies = ["sagemaker>=2.199"]
+        autodetect_depedencies = ["sagemaker[huggingface]>=2.199"]
 
     module_version_dict = _parse_dependency_list(autodetect_depedencies)
 
diff --git a/src/sagemaker/serve/utils/hardware_detector.py b/src/sagemaker/serve/utils/hardware_detector.py
@@ -18,9 +18,7 @@
 
 from botocore.exceptions import ClientError
 
-from accelerate.commands.estimate import estimate_command_parser, gather_data
 from sagemaker import Session
-from sagemaker.model import Model
 from sagemaker import instance_types_gpu_info
 
 logger = logging.getLogger(__name__)
@@ -116,18 +114,27 @@ def _format_instance_type(instance_type: str) -> str:
     return ec2_instance
 
 
-def _total_inference_model_size_mib(model: Model, dtype: str) -> int:
+def _total_inference_model_size_mib(model: str, dtype: str) -> int:
     """Calculates the model size from HF accelerate
 
     This function gets the model size from accelerate. It also adds a
     padding and converts to size MiB. When performing inference, expect
      to add up to an additional 20% to the given model size as found by EleutherAI.
     """
-    args = estimate_command_parser().parse_args([model, "--dtypes", dtype])
-
-    output = gather_data(
-        args
-    )  # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
+    output = None
+    try:
+        from accelerate.commands.estimate import estimate_command_parser, gather_data
+
+        args = estimate_command_parser().parse_args([model, "--dtypes", dtype])
+
+        output = gather_data(
+            args
+        )  # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam"
+    except ImportError:
+        logger.error(
+            "To enable Model size calculations: Install HuggingFace extras dependencies "
+            "using pip install 'sagemaker[huggingface]>=2.212.0'"
+        )
 
     if output is None:
         raise ValueError(f"Could not get Model size for {model}")
diff --git a/tests/integ/sagemaker/serve/constants.py b/tests/integ/sagemaker/serve/constants.py
@@ -20,7 +20,6 @@
 SERVE_IN_PROCESS_TIMEOUT = 5
 SERVE_MODEL_PACKAGE_TIMEOUT = 10
 SERVE_LOCAL_CONTAINER_TIMEOUT = 10
-SERVE_LOCAL_CONTAINER_TUNE_TIMEOUT = 15
 SERVE_SAGEMAKER_ENDPOINT_TIMEOUT = 15
 SERVE_SAVE_TIMEOUT = 2
 
diff --git a/tests/integ/sagemaker/serve/test_serve_js_happy.py b/tests/integ/sagemaker/serve/test_serve_js_happy.py
@@ -14,12 +14,10 @@
 
 import pytest
 
-from sagemaker.serve import Mode
 from sagemaker.serve.builder.model_builder import ModelBuilder
 from sagemaker.serve.builder.schema_builder import SchemaBuilder
 from tests.integ.sagemaker.serve.constants import (
     SERVE_SAGEMAKER_ENDPOINT_TIMEOUT,
-    SERVE_LOCAL_CONTAINER_TUNE_TIMEOUT,
     PYTHON_VERSION_IS_NOT_310,
 )
 
@@ -77,32 +75,3 @@ def test_happy_tgi_sagemaker_endpoint(happy_model_builder, gpu_instance_type):
             )
             if caught_ex:
                 raise caught_ex
-
-
-@pytest.mark.skipif(
-    PYTHON_VERSION_IS_NOT_310,
-    reason="The goal of these tests are to test the serving components of our feature",
-)
-@pytest.mark.local_mode
-def test_happy_tune_tgi_local_mode(sagemaker_local_session):
-    logger.info("Running in LOCAL_CONTAINER mode...")
-    caught_ex = None
-
-    model_builder = ModelBuilder(
-        model="huggingface-llm-bilingual-rinna-4b-instruction-ppo-bf16",
-        schema_builder=SchemaBuilder(SAMPLE_PROMPT, SAMPLE_RESPONSE),
-        mode=Mode.LOCAL_CONTAINER,
-        sagemaker_session=sagemaker_local_session,
-    )
-
-    model = model_builder.build()
-
-    with timeout(minutes=SERVE_LOCAL_CONTAINER_TUNE_TIMEOUT):
-        try:
-            tuned_model = model.tune()
-            assert tuned_model.env is not None
-        except Exception as e:
-            caught_ex = e
-        finally:
-            if caught_ex:
-                raise caught_ex
diff --git a/tests/integ/sagemaker/serve/test_serve_pt_happy.py b/tests/integ/sagemaker/serve/test_serve_pt_happy.py
@@ -10,6 +10,7 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
+# flake8: noqa: F631
 from __future__ import absolute_import
 
 import pytest
@@ -221,10 +222,8 @@ def test_happy_pytorch_sagemaker_endpoint(
             )
             if caught_ex:
                 logger.exception(caught_ex)
-                ignore_if_worker_dies = "Worker died." in str(caught_ex)
-                # https://github.com/pytorch/serve/issues/3032
                 assert (
-                    ignore_if_worker_dies
+                    False,
                 ), f"{caught_ex} was thrown when running pytorch squeezenet sagemaker endpoint test"
 
 
diff --git a/tests/unit/sagemaker/serve/detector/test_dependency_manager.py b/tests/unit/sagemaker/serve/detector/test_dependency_manager.py
@@ -99,7 +99,7 @@ def test_capture_dependencies(self, mock_subprocess, mock_file, mock_path):
             call("custom_module==1.2.3\n"),
             call("numpy==4.5\n"),
             call("boto3=1.28.*\n"),
-            call("sagemaker>=2.199\n"),
+            call("sagemaker[huggingface]>=2.199\n"),
             call("other_module@http://some/website.whl\n"),
         ]
         mocked_writes.assert_has_calls(expected_calls)
diff --git a/tests/unit/sagemaker/serve/utils/test_hardware_detector.py b/tests/unit/sagemaker/serve/utils/test_hardware_detector.py
@@ -101,8 +101,8 @@ def test_format_instance_type_without_ml_success():
     assert formatted_instance_type == "g5.48xlarge"
 
 
-@patch("sagemaker.serve.utils.hardware_detector.estimate_command_parser")
-@patch("sagemaker.serve.utils.hardware_detector.gather_data")
+@patch("accelerate.commands.estimate.estimate_command_parser")
+@patch("accelerate.commands.estimate.gather_data")
 def test_total_inference_model_size_mib(
     mock_gather_data,
     mock_parser,
@@ -120,3 +120,7 @@ def test_total_inference_model_size_mib(
 
     with pytest.raises(ValueError):
         hardware_detector._total_inference_model_size_mib("stable-diffusion", "float32")
+
+    mock_parser.side_effect = ImportError
+    with pytest.raises(ValueError):
+        hardware_detector._total_inference_model_size_mib("stable-diffusion", "float32")

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ def test_capture_dependencies(self, mock_subprocess, mock_file, mock_path):`
`99`	`99`	`call("custom_module==1.2.3\n"),`
`100`	`100`	`call("numpy==4.5\n"),`
`101`	`101`	`call("boto3=1.28.*\n"),`
`102`		`- call("sagemaker>=2.199\n"),`
	`102`	`+ call("sagemaker[huggingface]>=2.199\n"),`
`103`	`103`	`call("other_module@http://some/website.whl\n"),`
`104`	`104`	`]`
`105`	`105`	`mocked_writes.assert_has_calls(expected_calls)`