Skip to content

Commit 106117d

Browse files
Test PR to Fix and Trigger V2 tests (#5388)
* fix pr check * Fixing two failed unit tests with proper mocking * Adding back workflow files restored * retrigger tests --------- Co-authored-by: jzhaoqwa <[email protected]>
1 parent 8845681 commit 106117d

File tree

5 files changed

+101
-16
lines changed

5 files changed

+101
-16
lines changed

doc/doc_utils/jumpstart_doc_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# language governing permissions and limitations under the License.
1313
from __future__ import absolute_import
1414
from urllib import request
15+
from urllib.parse import quote
1516
import json
1617
from packaging.version import Version
1718
from enum import Enum
@@ -170,11 +171,11 @@ def get_proprietary_sdk_manifest():
170171

171172

172173
def get_jumpstart_sdk_spec(s3_key: str):
173-
return get_public_s3_json_object(f"{JUMPSTART_BUCKET_BASE_URL}/{s3_key}")
174+
return get_public_s3_json_object(f"{JUMPSTART_BUCKET_BASE_URL}/{quote(s3_key)}")
174175

175176

176177
def get_proprietary_sdk_spec(s3_key: str):
177-
return get_public_s3_json_object(f"{PROPRIETARY_DOC_BUCKET}/{s3_key}")
178+
return get_public_s3_json_object(f"{PROPRIETARY_DOC_BUCKET}/{quote(s3_key)}")
178179

179180

180181
def get_model_task(id):

src/sagemaker/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
from sagemaker.automl.automl import AutoML, AutoMLJob, AutoMLInput # noqa: F401
6464
from sagemaker.automl.candidate_estimator import CandidateEstimator, CandidateStep # noqa: F401
6565
from sagemaker.automl.automlv2 import ( # noqa: F401
66-
AutoMLV2,
66+
AutoMLV2,
6767
AutoMLJobV2,
6868
LocalAutoMLDataChannel,
6969
AutoMLDataChannel,

tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,36 @@ def test_load_base_recipe_types(
109109
assert "trainer" in load_recipe
110110

111111
if recipe_type == "sagemaker":
112-
mock_clone.side_effect = _run_clone_command_silent
113-
load_recipe = _load_base_recipe(
114-
training_recipe="training/llama/p4_hf_llama3_70b_seq8k_gpu",
115-
recipe_overrides=None,
116-
training_recipes_cfg=training_recipes_cfg,
117-
)
118-
assert load_recipe is not None
119-
assert "trainer" in load_recipe
120-
assert mock_clone.call_args.args[0] == training_recipes_cfg.get("launcher_repo")
112+
# Mock the file check to simulate recipe exists
113+
with patch("os.path.isfile", return_value=True), \
114+
patch("shutil.copy") as mock_copy:
115+
# Create a temporary recipe file for the copy operation
116+
import tempfile
117+
import os
118+
with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
119+
yaml.dump({"trainer": {"num_nodes": 1}}, f)
120+
temp_path = f.name
121+
122+
def copy_side_effect(src, dst):
123+
# Read from temp file and write to destination
124+
with open(temp_path, 'r') as src_file:
125+
content = src_file.read()
126+
with open(dst, 'w') as dst_file:
127+
dst_file.write(content)
128+
129+
mock_copy.side_effect = copy_side_effect
130+
131+
load_recipe = _load_base_recipe(
132+
training_recipe="training/llama/p4_hf_llama3_70b_seq8k_gpu",
133+
recipe_overrides=None,
134+
training_recipes_cfg=training_recipes_cfg,
135+
)
136+
assert load_recipe is not None
137+
assert "trainer" in load_recipe
138+
assert mock_clone.call_args.args[0] == training_recipes_cfg.get("launcher_repo")
139+
140+
# Clean up
141+
os.unlink(temp_path)
121142

122143
if recipe_type == "url":
123144
url = "https://raw.githubusercontent.com/aws-neuron/neuronx-distributed-training/refs/heads/main/examples/conf/hf_llama3_8B_config.yaml" # noqa

tests/unit/sagemaker/modules/train/test_model_trainer.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,16 @@ def mock_upload_data(path, bucket, key_prefix):
931931
)
932932

933933

934-
def test_model_trainer_gpu_recipe_full_init(modules_session):
934+
@patch("sagemaker.modules.train.model_trainer._load_base_recipe")
935+
def test_model_trainer_gpu_recipe_full_init(mock_load_recipe, modules_session):
936+
from omegaconf import OmegaConf
937+
938+
# Mock the recipe loading to return a valid GPU recipe structure
939+
mock_load_recipe.return_value = OmegaConf.create({
940+
"trainer": {"num_nodes": 2},
941+
"model": {"model_type": "llama_v3"},
942+
})
943+
935944
training_recipe = "training/llama/p4_hf_llama3_70b_seq8k_gpu"
936945
recipe_overrides = {"run": {"results_dir": "/opt/ml/model"}}
937946
compute = Compute(instance_type="ml.p4d.24xlarge", instance_count="2")

tests/unit/test_pytorch.py

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -898,7 +898,25 @@ def test_training_recipe_for_cpu(sagemaker_session):
898898
("hf_mixtral_8x7b_seq8k_gpu_p5x16_pretrain", "mixtral"),
899899
],
900900
)
901-
def test_training_recipe_for_gpu(sagemaker_session, recipe, model):
901+
@patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
902+
@patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
903+
def test_training_recipe_for_gpu(mock_gpu_script, mock_recipe_load, sagemaker_session, recipe, model):
904+
from omegaconf import OmegaConf
905+
906+
# Mock the GPU script function to return the expected entry point
907+
mock_gpu_script.return_value = f"{model}_pretrain.py"
908+
909+
# Mock the recipe structure that would be loaded
910+
mock_recipe = OmegaConf.create({
911+
"trainer": {
912+
"num_nodes": 1,
913+
},
914+
"model": {
915+
"model_type": model,
916+
},
917+
})
918+
mock_recipe_load.return_value = (recipe, mock_recipe)
919+
902920
container_log_level = '"logging.INFO"'
903921

904922
recipe_overrides = {
@@ -946,7 +964,25 @@ def test_training_recipe_for_gpu(sagemaker_session, recipe, model):
946964
assert pytorch.distribution.items() == expected_distribution.items()
947965

948966

949-
def test_training_recipe_with_override(sagemaker_session):
967+
@patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
968+
@patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
969+
def test_training_recipe_with_override(mock_gpu_script, mock_recipe_load, sagemaker_session):
970+
from omegaconf import OmegaConf
971+
972+
# Mock the GPU script function to return the expected entry point
973+
mock_gpu_script.return_value = "mistral_pretrain.py"
974+
975+
# Mock the recipe structure that would be loaded
976+
mock_recipe = OmegaConf.create({
977+
"trainer": {
978+
"num_nodes": 1,
979+
},
980+
"model": {
981+
"model_type": "mistral",
982+
},
983+
})
984+
mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe)
985+
950986
container_log_level = '"logging.INFO"'
951987

952988
recipe_overrides = {
@@ -983,7 +1019,25 @@ def test_training_recipe_with_override(sagemaker_session):
9831019
assert pytorch.image_uri == IMAGE_URI
9841020

9851021

986-
def test_training_recipe_gpu_custom_source_dir(sagemaker_session):
1022+
@patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
1023+
@patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
1024+
def test_training_recipe_gpu_custom_source_dir(mock_gpu_script, mock_recipe_load, sagemaker_session):
1025+
from omegaconf import OmegaConf
1026+
1027+
# Mock the GPU script function to return the expected entry point
1028+
mock_gpu_script.return_value = "mistral_pretrain.py"
1029+
1030+
# Mock the recipe structure that would be loaded
1031+
mock_recipe = OmegaConf.create({
1032+
"trainer": {
1033+
"num_nodes": 1,
1034+
},
1035+
"model": {
1036+
"model_type": "mistral",
1037+
},
1038+
})
1039+
mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe)
1040+
9871041
container_log_level = '"logging.INFO"'
9881042

9891043
recipe_overrides = {

0 commit comments

Comments
 (0)