Test PR to Fix and Trigger V2 tests (#5388)

aviruthen · zhaoqizqwang · web-flow · commit 106117d5c987 · 2025-12-09T14:10:56.000-08:00
* fix pr check

* Fixing two failed unit tests with proper mocking

* Adding back workflow files restored

* retrigger tests

---------

Co-authored-by: jzhaoqwa &lt;52220743+zhaoqizqwang@users.noreply.github.com&gt;
diff --git a/doc/doc_utils/jumpstart_doc_utils.py b/doc/doc_utils/jumpstart_doc_utils.py
@@ -12,6 +12,7 @@
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
 from urllib import request
+from urllib.parse import quote
 import json
 from packaging.version import Version
 from enum import Enum
@@ -170,11 +171,11 @@ def get_proprietary_sdk_manifest():
 
 
 def get_jumpstart_sdk_spec(s3_key: str):
-    return get_public_s3_json_object(f"{JUMPSTART_BUCKET_BASE_URL}/{s3_key}")
+    return get_public_s3_json_object(f"{JUMPSTART_BUCKET_BASE_URL}/{quote(s3_key)}")
 
 
 def get_proprietary_sdk_spec(s3_key: str):
-    return get_public_s3_json_object(f"{PROPRIETARY_DOC_BUCKET}/{s3_key}")
+    return get_public_s3_json_object(f"{PROPRIETARY_DOC_BUCKET}/{quote(s3_key)}")
 
 
 def get_model_task(id):
diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py
@@ -63,7 +63,7 @@
 from sagemaker.automl.automl import AutoML, AutoMLJob, AutoMLInput  # noqa: F401
 from sagemaker.automl.candidate_estimator import CandidateEstimator, CandidateStep  # noqa: F401
 from sagemaker.automl.automlv2 import (  # noqa: F401
-    AutoMLV2,
+    AutoMLV2, 
     AutoMLJobV2,
     LocalAutoMLDataChannel,
     AutoMLDataChannel,
diff --git a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py
@@ -109,15 +109,36 @@ def test_load_base_recipe_types(
         assert "trainer" in load_recipe
 
     if recipe_type == "sagemaker":
-        mock_clone.side_effect = _run_clone_command_silent
-        load_recipe = _load_base_recipe(
-            training_recipe="training/llama/p4_hf_llama3_70b_seq8k_gpu",
-            recipe_overrides=None,
-            training_recipes_cfg=training_recipes_cfg,
-        )
-        assert load_recipe is not None
-        assert "trainer" in load_recipe
-        assert mock_clone.call_args.args[0] == training_recipes_cfg.get("launcher_repo")
+        # Mock the file check to simulate recipe exists
+        with patch("os.path.isfile", return_value=True), \
+             patch("shutil.copy") as mock_copy:
+            # Create a temporary recipe file for the copy operation
+            import tempfile
+            import os
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+                yaml.dump({"trainer": {"num_nodes": 1}}, f)
+                temp_path = f.name
+            
+            def copy_side_effect(src, dst):
+                # Read from temp file and write to destination
+                with open(temp_path, 'r') as src_file:
+                    content = src_file.read()
+                with open(dst, 'w') as dst_file:
+                    dst_file.write(content)
+            
+            mock_copy.side_effect = copy_side_effect
+            
+            load_recipe = _load_base_recipe(
+                training_recipe="training/llama/p4_hf_llama3_70b_seq8k_gpu",
+                recipe_overrides=None,
+                training_recipes_cfg=training_recipes_cfg,
+            )
+            assert load_recipe is not None
+            assert "trainer" in load_recipe
+            assert mock_clone.call_args.args[0] == training_recipes_cfg.get("launcher_repo")
+            
+            # Clean up
+            os.unlink(temp_path)
 
     if recipe_type == "url":
         url = "https://raw.githubusercontent.com/aws-neuron/neuronx-distributed-training/refs/heads/main/examples/conf/hf_llama3_8B_config.yaml"  # noqa
diff --git a/tests/unit/sagemaker/modules/train/test_model_trainer.py b/tests/unit/sagemaker/modules/train/test_model_trainer.py
@@ -931,7 +931,16 @@ def mock_upload_data(path, bucket, key_prefix):
     )
 
 
-def test_model_trainer_gpu_recipe_full_init(modules_session):
+@patch("sagemaker.modules.train.model_trainer._load_base_recipe")
+def test_model_trainer_gpu_recipe_full_init(mock_load_recipe, modules_session):
+    from omegaconf import OmegaConf
+    
+    # Mock the recipe loading to return a valid GPU recipe structure
+    mock_load_recipe.return_value = OmegaConf.create({
+        "trainer": {"num_nodes": 2},
+        "model": {"model_type": "llama_v3"},
+    })
+    
     training_recipe = "training/llama/p4_hf_llama3_70b_seq8k_gpu"
     recipe_overrides = {"run": {"results_dir": "/opt/ml/model"}}
     compute = Compute(instance_type="ml.p4d.24xlarge", instance_count="2")
diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py
@@ -898,7 +898,25 @@ def test_training_recipe_for_cpu(sagemaker_session):
         ("hf_mixtral_8x7b_seq8k_gpu_p5x16_pretrain", "mixtral"),
     ],
 )
-def test_training_recipe_for_gpu(sagemaker_session, recipe, model):
+@patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
+@patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
+def test_training_recipe_for_gpu(mock_gpu_script, mock_recipe_load, sagemaker_session, recipe, model):
+    from omegaconf import OmegaConf
+    
+    # Mock the GPU script function to return the expected entry point
+    mock_gpu_script.return_value = f"{model}_pretrain.py"
+    
+    # Mock the recipe structure that would be loaded
+    mock_recipe = OmegaConf.create({
+        "trainer": {
+            "num_nodes": 1,
+        },
+        "model": {
+            "model_type": model,
+        },
+    })
+    mock_recipe_load.return_value = (recipe, mock_recipe)
+    
     container_log_level = '"logging.INFO"'
 
     recipe_overrides = {
@@ -946,7 +964,25 @@ def test_training_recipe_for_gpu(sagemaker_session, recipe, model):
     assert pytorch.distribution.items() == expected_distribution.items()
 
 
-def test_training_recipe_with_override(sagemaker_session):
+@patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
+@patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
+def test_training_recipe_with_override(mock_gpu_script, mock_recipe_load, sagemaker_session):
+    from omegaconf import OmegaConf
+    
+    # Mock the GPU script function to return the expected entry point
+    mock_gpu_script.return_value = "mistral_pretrain.py"
+    
+    # Mock the recipe structure that would be loaded
+    mock_recipe = OmegaConf.create({
+        "trainer": {
+            "num_nodes": 1,
+        },
+        "model": {
+            "model_type": "mistral",
+        },
+    })
+    mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe)
+    
     container_log_level = '"logging.INFO"'
 
     recipe_overrides = {
@@ -983,7 +1019,25 @@ def test_training_recipe_with_override(sagemaker_session):
     assert pytorch.image_uri == IMAGE_URI
 
 
-def test_training_recipe_gpu_custom_source_dir(sagemaker_session):
+@patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
+@patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
+def test_training_recipe_gpu_custom_source_dir(mock_gpu_script, mock_recipe_load, sagemaker_session):
+    from omegaconf import OmegaConf
+    
+    # Mock the GPU script function to return the expected entry point
+    mock_gpu_script.return_value = "mistral_pretrain.py"
+    
+    # Mock the recipe structure that would be loaded
+    mock_recipe = OmegaConf.create({
+        "trainer": {
+            "num_nodes": 1,
+        },
+        "model": {
+            "model_type": "mistral",
+        },
+    })
+    mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe)
+    
     container_log_level = '"logging.INFO"'
 
     recipe_overrides = {