Skip to content

fix: ModelBuilder not passing HF_TOKEN to model. #4780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions src/sagemaker/serve/builder/jumpstart_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,6 @@ def _optimize_for_jumpstart(
self,
output_path: Optional[str] = None,
instance_type: Optional[str] = None,
role_arn: Optional[str] = None,
tags: Optional[Tags] = None,
job_name: Optional[str] = None,
accept_eula: Optional[bool] = None,
Expand All @@ -685,9 +684,7 @@ def _optimize_for_jumpstart(

Args:
output_path (Optional[str]): Specifies where to store the compiled/quantized model.
instance_type (Optional[str]): Target deployment instance type that
the model is optimized for.
role_arn (Optional[str]): Execution role. Defaults to ``None``.
instance_type (str): Target deployment instance type that the model is optimized for.
tags (Optional[Tags]): Tags for labeling a model optimization job. Defaults to ``None``.
job_name (Optional[str]): The name of the model optimization job. Defaults to ``None``.
accept_eula (bool): For models that require a Model Access Config, specify True or
Expand Down Expand Up @@ -715,7 +712,7 @@ def _optimize_for_jumpstart(
f"Model '{self.model}' requires accepting end-user license agreement (EULA)."
)

is_compilation = (quantization_config is None) and (
is_compilation = (not quantization_config) and (
(compilation_config is not None) or _is_inferentia_or_trainium(instance_type)
)

Expand Down Expand Up @@ -758,7 +755,6 @@ def _optimize_for_jumpstart(
else None
)
self.instance_type = instance_type or deployment_config_instance_type or _get_nb_instance()
self.role_arn = role_arn or self.role_arn

create_optimization_job_args = {
"OptimizationJobName": job_name,
Expand Down Expand Up @@ -787,10 +783,10 @@ def _optimize_for_jumpstart(
"AcceptEula": True
}

optimization_env_vars = _update_environment_variables(optimization_env_vars, override_env)
if optimization_env_vars:
self.pysdk_model.env.update(optimization_env_vars)
if quantization_config or is_compilation:
self.pysdk_model.env = _update_environment_variables(
optimization_env_vars, override_env
)
return create_optimization_job_args
return None

Expand Down
36 changes: 13 additions & 23 deletions src/sagemaker/serve/builder/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@
_generate_model_source,
_extract_optimization_config_and_env,
_is_s3_uri,
_normalize_local_model_path,
_custom_speculative_decoding,
_extract_speculative_draft_model_provider,
)
Expand Down Expand Up @@ -833,6 +832,8 @@ def build( # pylint: disable=R0911
# until we deprecate HUGGING_FACE_HUB_TOKEN.
if self.env_vars.get("HUGGING_FACE_HUB_TOKEN") and not self.env_vars.get("HF_TOKEN"):
self.env_vars["HF_TOKEN"] = self.env_vars.get("HUGGING_FACE_HUB_TOKEN")
elif self.env_vars.get("HF_TOKEN") and not self.env_vars.get("HUGGING_FACE_HUB_TOKEN"):
self.env_vars["HUGGING_FACE_HUB_TOKEN"] = self.env_vars.get("HF_TOKEN")

self.sagemaker_session.settings._local_download_dir = self.model_path

Expand All @@ -851,7 +852,9 @@ def build( # pylint: disable=R0911

self._build_validations()

if not self._is_jumpstart_model_id() and self.model_server:
if (
not (isinstance(self.model, str) and self._is_jumpstart_model_id())
) and self.model_server:
return self._build_for_model_server()

if isinstance(self.model, str):
Expand Down Expand Up @@ -1216,18 +1219,15 @@ def _model_builder_optimize_wrapper(
raise ValueError("Quantization config and compilation config are mutually exclusive.")

self.sagemaker_session = sagemaker_session or self.sagemaker_session or Session()

self.instance_type = instance_type or self.instance_type
self.role_arn = role_arn or self.role_arn

self.build(mode=self.mode, sagemaker_session=self.sagemaker_session)
job_name = job_name or f"modelbuilderjob-{uuid.uuid4().hex}"

if self._is_jumpstart_model_id():
self.build(mode=self.mode, sagemaker_session=self.sagemaker_session)
input_args = self._optimize_for_jumpstart(
output_path=output_path,
instance_type=instance_type,
role_arn=self.role_arn,
tags=tags,
job_name=job_name,
accept_eula=accept_eula,
Expand All @@ -1240,10 +1240,13 @@ def _model_builder_optimize_wrapper(
max_runtime_in_sec=max_runtime_in_sec,
)
else:
if self.model_server != ModelServer.DJL_SERVING:
logger.info("Overriding model server to DJL_SERVING.")
self.model_server = ModelServer.DJL_SERVING

self.build(mode=self.mode, sagemaker_session=self.sagemaker_session)
input_args = self._optimize_for_hf(
output_path=output_path,
instance_type=instance_type,
role_arn=self.role_arn,
tags=tags,
job_name=job_name,
quantization_config=quantization_config,
Expand All @@ -1269,8 +1272,6 @@ def _model_builder_optimize_wrapper(
def _optimize_for_hf(
self,
output_path: str,
instance_type: Optional[str] = None,
role_arn: Optional[str] = None,
tags: Optional[Tags] = None,
job_name: Optional[str] = None,
quantization_config: Optional[Dict] = None,
Expand All @@ -1285,9 +1286,6 @@ def _optimize_for_hf(

Args:
output_path (str): Specifies where to store the compiled/quantized model.
instance_type (Optional[str]): Target deployment instance type that
the model is optimized for.
role_arn (Optional[str]): Execution role. Defaults to ``None``.
tags (Optional[Tags]): Tags for labeling a model optimization job. Defaults to ``None``.
job_name (Optional[str]): The name of the model optimization job. Defaults to ``None``.
quantization_config (Optional[Dict]): Quantization configuration. Defaults to ``None``.
Expand All @@ -1305,13 +1303,6 @@ def _optimize_for_hf(
Returns:
Optional[Dict[str, Any]]: Model optimization job input arguments.
"""
if self.model_server != ModelServer.DJL_SERVING:
logger.info("Overwriting model server to DJL.")
self.model_server = ModelServer.DJL_SERVING

self.role_arn = role_arn or self.role_arn
self.instance_type = instance_type or self.instance_type

self.pysdk_model = _custom_speculative_decoding(
self.pysdk_model, speculative_decoding_config, False
)
Expand Down Expand Up @@ -1371,13 +1362,12 @@ def _optimize_prepare_for_hf(self):
)
else:
if not custom_model_path:
custom_model_path = f"/tmp/sagemaker/model-builder/{self.model}/code"
custom_model_path = f"/tmp/sagemaker/model-builder/{self.model}"
download_huggingface_model_metadata(
self.model,
custom_model_path,
os.path.join(custom_model_path, "code"),
self.env_vars.get("HUGGING_FACE_HUB_TOKEN"),
)
custom_model_path = _normalize_local_model_path(custom_model_path)

self.pysdk_model.model_data, env = self._prepare_for_mode(
model_path=custom_model_path,
Expand Down
20 changes: 0 additions & 20 deletions src/sagemaker/serve/utils/optimize_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,26 +282,6 @@ def _extract_optimization_config_and_env(
return None, None


def _normalize_local_model_path(local_model_path: Optional[str]) -> Optional[str]:
"""Normalizes the local model path.

Args:
local_model_path (Optional[str]): The local model path.

Returns:
Optional[str]: The normalized model path.
"""
if local_model_path is None:
return local_model_path

# Removes /code or /code/ path at the end of local_model_path,
# as it is appended during artifacts upload.
pattern = r"/code/?$"
if re.search(pattern, local_model_path):
return re.sub(pattern, "", local_model_path)
return local_model_path


def _custom_speculative_decoding(
model: Model,
speculative_decoding_config: Optional[Dict],
Expand Down
Loading