Skip to content

feature: Add and use sagemaker_schema_inference_artifacts dependency for huggingface in schema builder (question-answering only) #4554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements/extras/huggingface_requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
accelerate>=0.24.1,<=0.27.0
sagemaker_schema_inference_artifacts>=0.0.2
17 changes: 12 additions & 5 deletions src/sagemaker/serve/builder/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
)
from sagemaker.workflow.entities import PipelineVariable
from sagemaker.huggingface.llm_utils import get_huggingface_model_metadata
from sagemaker_schema_inference_artifacts.huggingface import remote_schema_retriever
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest moving this import to be in the try catch block where we use remote_schema_retriever and catching an import error there if module is not installed.

The reason for the suggestion is: this dependencies are extra dependencies and not all the customers want to opt in to use this feature. example: #4549


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -637,7 +638,7 @@ def build( # pylint: disable=R0911
if model_task is None:
model_task = hf_model_md.get("pipeline_tag")
if self.schema_builder is None and model_task is not None:
self._schema_builder_init(model_task)
self._hf_schema_builder_init(model_task)
if model_task == "text-generation": # pylint: disable=R1705
return self._build_for_tgi()
elif self._can_fit_on_single_gpu():
Expand Down Expand Up @@ -704,8 +705,8 @@ def validate(self, model_dir: str) -> Type[bool]:

return get_metadata(model_dir)

def _schema_builder_init(self, model_task: str):
"""Initialize the schema builder
def _hf_schema_builder_init(self, model_task: str):
"""Initialize the schema builder for the given HF_TASK

Args:
model_task (str): Required, the task name
Expand All @@ -714,10 +715,16 @@ def _schema_builder_init(self, model_task: str):
TaskNotFoundException: If the I/O schema for the given task is not found.
"""
try:
sample_inputs, sample_outputs = task.retrieve_local_schemas(model_task)
try:
sample_inputs, sample_outputs = task.retrieve_local_schemas(model_task)
except ValueError:
# samples could not be loaded locally, try to fetch it from sagemaker_schema_inference_artifacts
remote_hf_schema_helper = remote_schema_retriever.RemoteSchemaRetriever()
sample_inputs, sample_outputs = remote_hf_schema_helper.get_resolved_hf_schema_for_task(model_task)
self.schema_builder = SchemaBuilder(sample_inputs, sample_outputs)
except ValueError:
raise TaskNotFoundException(f"Schema builder for {model_task} could not be found.")
raise TaskNotFoundException(f"HuggingFace Schema builder samples for {model_task} could not be found "
f"locally or via remote.")

def _can_fit_on_single_gpu(self) -> Type[bool]:
"""Check if model can fit on a single GPU
Expand Down
79 changes: 71 additions & 8 deletions tests/integ/sagemaker/serve/test_schema_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import pytest

from sagemaker.serve.utils.exceptions import TaskNotFoundException
from sagemaker_schema_inference_artifacts.huggingface import remote_schema_retriever
from sagemaker.serve.builder.schema_builder import SchemaBuilder
from tests.integ.sagemaker.serve.constants import (
PYTHON_VERSION_IS_NOT_310,
SERVE_SAGEMAKER_ENDPOINT_TIMEOUT,
Expand Down Expand Up @@ -49,7 +51,7 @@ def test_model_builder_happy_path_with_only_model_id_fill_mask(sagemaker_session
reason="Testing Schema Builder Simplification feature",
)
def test_model_builder_happy_path_with_only_model_id_question_answering(
sagemaker_session, gpu_instance_type
sagemaker_session, gpu_instance_type
):
model_builder = ModelBuilder(model="bert-large-uncased-whole-word-masking-finetuned-squad")

Expand Down Expand Up @@ -92,18 +94,19 @@ def test_model_builder_happy_path_with_only_model_id_question_answering(


def test_model_builder_negative_path(sagemaker_session):
model_builder = ModelBuilder(model="CompVis/stable-diffusion-v1-4")

# A model-task combo unsupported by both the local and remote schema fallback options. (eg: text-to-video)
model_builder = ModelBuilder(model="ByteDance/AnimateDiff-Lightning")
with pytest.raises(
TaskNotFoundException,
match="Error Message: Schema builder for text-to-image could not be found.",
match="Error Message: HuggingFace Schema builder samples for text-to-video could not be found locally or via "
"remote.",
):
model_builder.build(sagemaker_session=sagemaker_session)


@pytest.mark.skipif(
PYTHON_VERSION_IS_NOT_310,
reason="Testing Schema Builder Simplification feature",
reason="Testing Schema Builder Simplification feature - Local Schema fallback",
)
@pytest.mark.parametrize(
"model_id, task_provided",
Expand All @@ -112,8 +115,8 @@ def test_model_builder_negative_path(sagemaker_session):
("bert-large-uncased-whole-word-masking-finetuned-squad", "question-answering"),
],
)
def test_model_builder_happy_path_with_task_provided(
model_id, task_provided, sagemaker_session, gpu_instance_type
def test_model_builder_happy_path_with_task_provided_local_schema_mode(
model_id, task_provided, sagemaker_session, gpu_instance_type
):
model_builder = ModelBuilder(model=model_id, model_metadata={"HF_TASK": task_provided})

Expand Down Expand Up @@ -154,6 +157,65 @@ def test_model_builder_happy_path_with_task_provided(
False
), f"{caught_ex} was thrown when running transformers sagemaker endpoint test"

@pytest.mark.skipif(
PYTHON_VERSION_IS_NOT_310,
reason="Testing Schema Builder Simplification feature - Remote Schema fallback",
)
@pytest.mark.parametrize(
"model_id, task_provided, gpu_instance_type",
[
("bert-large-uncased-whole-word-masking-finetuned-squad", "question-answering", "ml.m5.xlarge"),
],
)
def test_model_builder_happy_path_with_task_provided_remote_schema_mode(
model_id, task_provided, sagemaker_session, gpu_instance_type
):

model_builder = ModelBuilder(model=model_id,
model_metadata={"HF_TASK": task_provided},
instance_type=gpu_instance_type)
model = model_builder.build(sagemaker_session=sagemaker_session)

assert model is not None
assert model_builder.schema_builder is not None

remote_hf_schema_helper = remote_schema_retriever.RemoteSchemaRetriever()
inputs, outputs = remote_hf_schema_helper.get_resolved_hf_schema_for_task(task_provided)

if task_provided == "question-answering":
# Override model builder to use remote hf schema samples for question-answering for this e2e test.
model_builder.schema_builder = SchemaBuilder(inputs, outputs)

assert model_builder.schema_builder.sample_input == inputs
assert model_builder.schema_builder.sample_output == outputs

with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT):
caught_ex = None
try:
iam_client = sagemaker_session.boto_session.client("iam")
role_arn = iam_client.get_role(RoleName="SageMakerRole")["Role"]["Arn"]

logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...")
predictor = model.deploy(
role=role_arn, instance_count=1, instance_type=gpu_instance_type
)

predicted_outputs = predictor.predict(inputs)
assert predicted_outputs is not None

except Exception as e:
caught_ex = e
finally:
cleanup_model_resources(
sagemaker_session=model_builder.sagemaker_session,
model_name=model.name,
endpoint_name=model.endpoint_name,
)
if caught_ex:
logger.exception(caught_ex)
assert (
False
), f"{caught_ex} was thrown when running transformers sagemaker endpoint test"

def test_model_builder_negative_path_with_invalid_task(sagemaker_session):
model_builder = ModelBuilder(
Expand All @@ -162,6 +224,7 @@ def test_model_builder_negative_path_with_invalid_task(sagemaker_session):

with pytest.raises(
TaskNotFoundException,
match="Error Message: Schema builder for invalid-task could not be found.",
match="Error Message: HuggingFace Schema builder samples for invalid-task could not be found locally or via "
"remote.",
):
model_builder.build(sagemaker_session=sagemaker_session)
8 changes: 5 additions & 3 deletions tests/unit/sagemaker/serve/builder/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,7 +1062,7 @@ def test_build_negative_path_when_schema_builder_not_present(

# HF Pipeline Tag
mock_model_uris_retrieve.side_effect = KeyError
mock_llm_utils_json.load.return_value = {"pipeline_tag": "text-to-image"}
mock_llm_utils_json.load.return_value = {"pipeline_tag": "unsupported-task"}
mock_llm_utils_urllib.request.Request.side_effect = Mock()

# HF Model config
Expand All @@ -1075,7 +1075,8 @@ def test_build_negative_path_when_schema_builder_not_present(

self.assertRaisesRegex(
TaskNotFoundException,
"Error Message: Schema builder for text-to-image could not be found.",
"Error Message: HuggingFace Schema builder samples for unsupported-task could not be found locally or via "
"remote.",
lambda: model_builder.build(sagemaker_session=mock_session),
)

Expand Down Expand Up @@ -1627,7 +1628,8 @@ def test_build_task_override_with_invalid_task_provided(

self.assertRaisesRegex(
TaskNotFoundException,
f"Error Message: Schema builder for {provided_task} could not be found.",
f"Error Message: HuggingFace Schema builder samples for {provided_task} could not be found locally or "
f"via remote.",
lambda: model_builder.build(sagemaker_session=mock_session),
)

Expand Down