Skip to content

Commit 0875efc

Browse files
beniericpintaoz-aws
authored andcommitted
Update hyperpod recipe uris (#1629)
1 parent 02cc559 commit 0875efc

File tree

4 files changed

+18
-6
lines changed

4 files changed

+18
-6
lines changed
Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
{
2-
"adapter_repo": "git@github.com:aws/private-sagemaker-hyperpod-training-adapter-for-nemo-staging.git",
3-
"launcher_repo": "git@github.com:aws/private-sagemaker-hyperpod-recipes-staging.git",
2+
"adapter_repo": "https://github.com/aws/sagemaker-training-adapter-for-nemo.git",
3+
"launcher_repo": "https://github.com/aws/sagemaker-hyperpod-recipes.git",
44
"neuron_dist_repo": "https://github.com/aws-neuron/neuronx-distributed-training.git",
55
"gpu_image" : {
66
"framework": "pytorch-smp",
7-
"version": "2.3.1",
7+
"version": "2.4.1",
88
"additional_args": {}
99
},
10-
"neuron_image": "855988369404.dkr.ecr.us-west-2.amazonaws.com/chinmayee-dev:neuron_sept26_v1"
10+
"neuron_image": {
11+
"framework": "hyperpod-recipes-neuron",
12+
"version": "2.1.2",
13+
"additional_args": {}
14+
}
1115
}

src/sagemaker/modules/train/sm_recipes/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _load_base_recipe(
8787
else:
8888
recipe_launcher_dir = tempfile.TemporaryDirectory(prefix="launcher_")
8989

90-
launcher_repo = os.environ.get("training_launcher_git", None) or training_recipes_cfg.get(
90+
launcher_repo = os.environ.get("TRAINING_LAUNCHER_GIT", None) or training_recipes_cfg.get(
9191
"launcher_repo"
9292
)
9393
_run_clone_command_silent(launcher_repo, recipe_launcher_dir.name)
@@ -135,7 +135,7 @@ def _configure_gpu_args(
135135
source_code = SourceCode()
136136
args = dict()
137137

138-
adapter_repo = os.environ.get("training_adapter_git", None) or training_recipes_cfg.get(
138+
adapter_repo = os.environ.get("TRAINING_ADAPTER_GIT", None) or training_recipes_cfg.get(
139139
"adapter_repo"
140140
)
141141
_run_clone_command_silent(adapter_repo, recipe_train_dir.name)

tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def temporary_recipe():
4848
yield f.name
4949

5050

51+
# TODO: To be removed on 12/5/2024
52+
@pytest.mark.skip(reason="Hyperpod recipe code unavailable")
5153
def test_load_base_recipe_with_overrides(temporary_recipe, training_recipes_cfg):
5254
expected_epochs = 20
5355
expected_layers = 15
@@ -69,6 +71,8 @@ def test_load_base_recipe_with_overrides(temporary_recipe, training_recipes_cfg)
6971
)
7072

7173

74+
# TODO: To be removed on 12/5/2024
75+
@pytest.mark.skip(reason="Hyperpod recipe code unavailable")
7276
@pytest.mark.parametrize(
7377
"test_case",
7478
[
@@ -126,6 +130,8 @@ def test_load_base_recipe_types(
126130
assert mock_retrieve.call_args.args[0] == url
127131

128132

133+
# TODO: To be removed on 12/5/2024
134+
@pytest.mark.skip(reason="Hyperpod recipe code unavailable")
129135
@pytest.mark.parametrize(
130136
"test_case",
131137
[

tests/unit/sagemaker/modules/train/test_model_trainer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,8 @@ def mock_upload_data(path, bucket, key_prefix):
810810
)
811811

812812

813+
# TODO: To be removed on 12/5/2024
814+
@pytest.mark.skip(reason="Hyperpod recipe code unavailable")
813815
def test_model_trainer_gpu_recipe_full_init(modules_session):
814816
training_recipe = "training/llama/p4_hf_llama3_70b_seq8k_gpu"
815817
recipe_overrides = {"run": {"results_dir": "/opt/ml/model"}}

0 commit comments

Comments
 (0)