From 3099e13e03cd140fb142d9a7b65cec283d79ea53 Mon Sep 17 00:00:00 2001
From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com>
Date: Tue, 9 Dec 2025 16:32:24 -0800
Subject: [PATCH 1/7] test pr check

---
 .github/workflows/codebuild-canaries.yml  |  24 -----
 .github/workflows/codebuild-ci-health.yml |  84 ---------------
 .github/workflows/codebuild-ci.yml        |  97 -----------------
 .github/workflows/codeql.yml              |  35 -------
 .github/workflows/security-monitoring.yml | 121 ----------------------
 5 files changed, 361 deletions(-)
 delete mode 100644 .github/workflows/codebuild-canaries.yml
 delete mode 100644 .github/workflows/codebuild-ci-health.yml
 delete mode 100644 .github/workflows/codebuild-ci.yml
 delete mode 100644 .github/workflows/codeql.yml
 delete mode 100644 .github/workflows/security-monitoring.yml

diff --git a/.github/workflows/codebuild-canaries.yml b/.github/workflows/codebuild-canaries.yml
deleted file mode 100644
index a6b5a978ef..0000000000
--- a/.github/workflows/codebuild-canaries.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: Canaries
-on:
-  schedule:
-    - cron: "0 */3 * * *"
-  workflow_dispatch:
-
-permissions:
-    id-token: write # This is required for requesting the JWT
-
-jobs:
-  tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Integ Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        id: codebuild
-        with:
-          project-name: sagemaker-python-sdk-canaries
diff --git a/.github/workflows/codebuild-ci-health.yml b/.github/workflows/codebuild-ci-health.yml
deleted file mode 100644
index 119b9dbe9c..0000000000
--- a/.github/workflows/codebuild-ci-health.yml
+++ /dev/null
@@ -1,84 +0,0 @@
-name: CI Health
-on:
-  schedule:
-    - cron: "0 */3 * * *"
-  workflow_dispatch:
-    
-permissions:
-    id-token: write # This is required for requesting the JWT
-
-jobs:
-  codestyle-doc-tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Codestyle & Doc Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        with:
-          project-name: sagemaker-python-sdk-ci-health-codestyle-doc-tests
-  unit-tests:
-    runs-on: ubuntu-latest
-    strategy:
-        fail-fast: false
-        matrix:
-          python-version: ["py39", "py310", "py311","py312"]
-    steps:
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Unit Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        with:
-          project-name: sagemaker-python-sdk-ci-health-unit-tests
-          env-vars-for-codebuild: |
-            PY_VERSION
-        env:
-          PY_VERSION: ${{ matrix.python-version }}
-  integ-tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Integ Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        id: codebuild
-        with:
-          project-name: sagemaker-python-sdk-ci-health-integ-tests
-  slow-tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Slow Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        with:
-          project-name: sagemaker-python-sdk-ci-health-slow-tests
-  localmode-tests:
-      runs-on: ubuntu-latest
-      steps:
-        - name: Configure AWS Credentials
-          uses: aws-actions/configure-aws-credentials@v4
-          with:
-            role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-            aws-region: us-west-2
-            role-duration-seconds: 10800
-        - name: Run Local Mode Tests
-          uses: aws-actions/aws-codebuild-run-build@v1
-          with:
-            project-name: sagemaker-python-sdk-ci-health-localmode-tests
\ No newline at end of file
diff --git a/.github/workflows/codebuild-ci.yml b/.github/workflows/codebuild-ci.yml
deleted file mode 100644
index 2f9f6aa618..0000000000
--- a/.github/workflows/codebuild-ci.yml
+++ /dev/null
@@ -1,97 +0,0 @@
-name: PR Checks
-on:
-    pull_request_target:
-        branches:
-            - "master-v2"
-
-concurrency:
-    group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref }}
-    cancel-in-progress: true
-
-permissions:
-    id-token: write # This is required for requesting the JWT
-
-jobs:
-  collab-check:
-    runs-on: ubuntu-latest
-    outputs: 
-      approval-env: ${{ steps.collab-check.outputs.result }}
-    steps:
-      - name: Collaborator Check
-        uses: actions/github-script@v7
-        id: collab-check
-        with:
-          github-token: ${{ secrets.COLLAB_CHECK_TOKEN }}
-          result-encoding: string
-          script: | 
-            try {
-              const res = await github.rest.repos.checkCollaborator({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                username: "${{ github.event.pull_request.user.login }}",
-              });
-              console.log("Verifed ${{ github.event.pull_request.user.login }} is a repo collaborator. Auto Approving PR Checks.")
-              return res.status == "204" ? "auto-approve" : "manual-approval"
-            } catch (error) {
-              console.log("${{ github.event.pull_request.user.login }} is not a collaborator. Requiring Manual Approval to run PR Checks.")
-              return "manual-approval"
-            }
-  wait-for-approval:
-    runs-on: ubuntu-latest
-    needs: [collab-check]
-    environment: ${{ needs.collab-check.outputs.approval-env }}
-    steps:
-      - run: echo "Workflow Approved! Starting PR Checks."
-  codestyle-doc-tests:
-    runs-on: ubuntu-latest
-    needs: [wait-for-approval]
-    steps: 
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Codestyle & Doc Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        with:
-          project-name: ${{ github.event.repository.name }}-ci-codestyle-doc-tests
-          source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}'
-  unit-tests:
-    runs-on: ubuntu-latest
-    needs: [wait-for-approval]
-    strategy:
-        fail-fast: false
-        matrix:
-          python-version: ["py39","py310","py311","py312"]
-    steps:
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Unit Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        with:
-          project-name: ${{ github.event.repository.name }}-ci-unit-tests
-          source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}'
-          env-vars-for-codebuild: |
-            PY_VERSION
-        env:
-          PY_VERSION: ${{ matrix.python-version }}
-  integ-tests:
-    runs-on: ubuntu-latest
-    needs: [wait-for-approval]
-    steps: 
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
-          aws-region: us-west-2
-          role-duration-seconds: 10800
-      - name: Run Integ Tests
-        uses: aws-actions/aws-codebuild-run-build@v1
-        with:
-          project-name: ${{ github.event.repository.name }}-ci-integ-tests
-          source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}'
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
deleted file mode 100644
index 8fbf42803b..0000000000
--- a/.github/workflows/codeql.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: "CodeQL"
-on:
-  push:
-    branches: [ "master" ]
-  pull_request:
-    branches: [ "master" ]
-  schedule:
-    - cron: '30 15 * * *'
-jobs:
-  analyze:
-    name: Analyze (${{ matrix.language }})
-    runs-on: ${{ 'ubuntu-latest' }}
-    permissions:
-      security-events: write
-      packages: read
-
-    strategy:
-      matrix:
-        include:
-          - language: python
-            build-mode: none
-          - language: java-kotlin
-            build-mode: none
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@6ccd57f4c5d15bdc2fef309bd9fb6cc9db2ef1c6
-      - name: Initialize CodeQL
-        uses: github/codeql-action/init@4b1d7da102ff94aca014c0245062b1a463356d72
-        with:
-          languages: ${{ matrix.language }}
-          build-mode: ${{ matrix.build-mode }}
-      - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@4b1d7da102ff94aca014c0245062b1a463356d72
-        with:
-          category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/security-monitoring.yml b/.github/workflows/security-monitoring.yml
deleted file mode 100644
index ecce0643e6..0000000000
--- a/.github/workflows/security-monitoring.yml
+++ /dev/null
@@ -1,121 +0,0 @@
-name: Security Monitoring
-
-on:
-  schedule:
-    - cron: '0 16 * * *'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.run_id }}
-  cancel-in-progress: true
-
-permissions:
-  id-token: write
-
-jobs:
-  check-code-scanning-alerts:
-    runs-on: ubuntu-latest
-    outputs:
-      code_scanning_alert_status: ${{ steps.check-code-scanning-alerts.outputs.code_scanning_alert_status }}
-    steps:
-      - name: Check for security alerts
-        id: check-code-scanning-alerts
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea
-        with:
-          github-token: ${{ secrets.GH_PAT }}
-          script: |
-            async function checkAlerts() {
-              const owner = '${{ github.repository_owner }}';
-              const repo = '${{ github.event.repository.name }}';
-              const ref = 'refs/heads/master';
-            
-              const codeScanningAlerts = await github.rest.codeScanning.listAlertsForRepo({
-                owner,
-                repo,
-                ref: ref
-              });
-              const activeCodeScanningAlerts = codeScanningAlerts.data.filter(alert => alert.state === 'open');
-              core.setOutput('code_scanning_alert_status', activeCodeScanningAlerts.length > 0 ? '1': '0');
-            }
-            await checkAlerts();
-
-  check-dependabot-alerts:
-    runs-on: ubuntu-latest
-    outputs:
-      dependabot_alert_status: ${{ steps.check-dependabot-alerts.outputs.dependabot_alert_status }}
-    steps:
-      - name: Check for dependabot alerts
-        id: check-dependabot-alerts
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea
-        with:
-          github-token: ${{ secrets.GH_PAT }}
-          script: |
-            async function checkAlerts() {
-              const owner = '${{ github.repository_owner }}';
-              const repo = '${{ github.event.repository.name }}';
-            
-              const dependabotAlerts = await github.rest.dependabot.listAlertsForRepo({
-                owner,
-                repo,
-                headers: {
-                  'accept': 'applications/vnd.github+json'
-                }
-              }); 
-              const activeDependabotAlerts = dependabotAlerts.data.filter(alert => alert.state === 'open');
-              core.setOutput('dependabot_alert_status', activeDependabotAlerts.length > 0 ? '1': '0');
-            }
-            await checkAlerts();
-
-  check-secret-scanning-alerts:
-    runs-on: ubuntu-latest
-    outputs:
-      secret_scanning_alert_status: ${{ steps.check-secret-scanning-alerts.outputs.secret_scanning_alert_status }}
-    steps:
-      - name: Check for secret scanning alerts
-        id: check-secret-scanning-alerts
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea
-        with:
-          github-token: ${{ secrets.GH_PAT }}
-          script: |
-            async function checkAlerts() {
-              const owner = '${{ github.repository_owner }}';
-              const repo = '${{ github.event.repository.name }}';
-
-              const secretScanningAlerts = await github.rest.secretScanning.listAlertsForRepo({
-                owner,
-                repo,
-              }); 
-              const activeSecretScanningAlerts = secretScanningAlerts.data.filter(alert => alert.state === 'open');
-              core.setOutput('secret_scanning_alert_status', activeSecretScanningAlerts.length > 0 ? '1': '0');
-            }
-            await checkAlerts();
-
-  put-metric-data:
-    runs-on: ubuntu-latest
-    needs: [check-code-scanning-alerts, check-dependabot-alerts, check-secret-scanning-alerts]
-    steps:
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@12e3392609eaaceb7ae6191b3f54bbcb85b5002b
-        with:
-          role-to-assume: ${{ secrets.MONITORING_ROLE_ARN }}
-          aws-region: us-west-2
-      - name: Put Code Scanning Alert Metric Data
-        run: |
-          if [ "${{ needs.check-code-scanning-alerts.outputs.code_scanning_alert_status }}" == "1" ]; then
-            aws cloudwatch put-metric-data --metric-name CodeScanningAlert --namespace SecurityMonitoringMetrics --value 1 --unit Count --dimensions ProjectName=sagemaker-python-sdk
-          else
-            aws cloudwatch put-metric-data --metric-name CodeScanningAlert --namespace SecurityMonitoringMetrics --value 0 --unit Count --dimensions ProjectName=sagemaker-python-sdk
-          fi
-      - name: Put Dependabot Alert Metric Data
-        run: |
-          if [ "${{ needs.check-dependabot-alerts.outputs.dependabot_alert_status }}" == "1" ]; then
-            aws cloudwatch put-metric-data --metric-name DependabotAlert --namespace SecurityMonitoringMetrics --value 1 --unit Count --dimensions ProjectName=sagemaker-python-sdk
-          else
-            aws cloudwatch put-metric-data --metric-name DependabotAlert --namespace SecurityMonitoringMetrics --value 0 --unit Count --dimensions ProjectName=sagemaker-python-sdk
-          fi
-      - name: Put Secret Scanning Alert Metric Data
-        run: |
-          if [ "${{ needs.check-secret-scanning-alerts.outputs.secret_scanning_alert_status }}" == "1" ]; then
-            aws cloudwatch put-metric-data --metric-name SecretScanningAlert --namespace SecurityMonitoringMetrics --value 1 --unit Count --dimensions ProjectName=sagemaker-python-sdk
-          else
-            aws cloudwatch put-metric-data --metric-name SecretScanningAlert --namespace SecurityMonitoringMetrics --value 0 --unit Count --dimensions ProjectName=sagemaker-python-sdk
-          fi

From 6235d7e4c5d9ec90ec48669e19dc950f7d555d93 Mon Sep 17 00:00:00 2001
From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com>
Date: Tue, 9 Dec 2025 17:40:13 -0800
Subject: [PATCH 2/7] reformat files

---
 src/sagemaker/__init__.py                     |  2 +-
 .../modules/train/sm_recipes/test_utils.py    | 18 ++---
 .../modules/train/test_model_trainer.py       | 14 ++--
 tests/unit/test_pytorch.py                    | 80 +++++++++++--------
 4 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py
index 0167b7139a..6a3f43f66c 100644
--- a/src/sagemaker/__init__.py
+++ b/src/sagemaker/__init__.py
@@ -63,7 +63,7 @@
 from sagemaker.automl.automl import AutoML, AutoMLJob, AutoMLInput  # noqa: F401
 from sagemaker.automl.candidate_estimator import CandidateEstimator, CandidateStep  # noqa: F401
 from sagemaker.automl.automlv2 import (  # noqa: F401
-    AutoMLV2, 
+    AutoMLV2,
     AutoMLJobV2,
     LocalAutoMLDataChannel,
     AutoMLDataChannel,
diff --git a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py
index db0fb1c279..18ef583d3c 100644
--- a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py
+++ b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py
@@ -110,24 +110,24 @@ def test_load_base_recipe_types(
 
     if recipe_type == "sagemaker":
         # Mock the file check to simulate recipe exists
-        with patch("os.path.isfile", return_value=True), \
-             patch("shutil.copy") as mock_copy:
+        with patch("os.path.isfile", return_value=True), patch("shutil.copy") as mock_copy:
             # Create a temporary recipe file for the copy operation
             import tempfile
             import os
-            with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+
+            with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
                 yaml.dump({"trainer": {"num_nodes": 1}}, f)
                 temp_path = f.name
-            
+
             def copy_side_effect(src, dst):
                 # Read from temp file and write to destination
-                with open(temp_path, 'r') as src_file:
+                with open(temp_path, "r") as src_file:
                     content = src_file.read()
-                with open(dst, 'w') as dst_file:
+                with open(dst, "w") as dst_file:
                     dst_file.write(content)
-            
+
             mock_copy.side_effect = copy_side_effect
-            
+
             load_recipe = _load_base_recipe(
                 training_recipe="training/llama/p4_hf_llama3_70b_seq8k_gpu",
                 recipe_overrides=None,
@@ -136,7 +136,7 @@ def copy_side_effect(src, dst):
             assert load_recipe is not None
             assert "trainer" in load_recipe
             assert mock_clone.call_args.args[0] == training_recipes_cfg.get("launcher_repo")
-            
+
             # Clean up
             os.unlink(temp_path)
 
diff --git a/tests/unit/sagemaker/modules/train/test_model_trainer.py b/tests/unit/sagemaker/modules/train/test_model_trainer.py
index 6431f96502..367ab2a84e 100644
--- a/tests/unit/sagemaker/modules/train/test_model_trainer.py
+++ b/tests/unit/sagemaker/modules/train/test_model_trainer.py
@@ -934,13 +934,15 @@ def mock_upload_data(path, bucket, key_prefix):
 @patch("sagemaker.modules.train.model_trainer._load_base_recipe")
 def test_model_trainer_gpu_recipe_full_init(mock_load_recipe, modules_session):
     from omegaconf import OmegaConf
-    
+
     # Mock the recipe loading to return a valid GPU recipe structure
-    mock_load_recipe.return_value = OmegaConf.create({
-        "trainer": {"num_nodes": 2},
-        "model": {"model_type": "llama_v3"},
-    })
-    
+    mock_load_recipe.return_value = OmegaConf.create(
+        {
+            "trainer": {"num_nodes": 2},
+            "model": {"model_type": "llama_v3"},
+        }
+    )
+
     training_recipe = "training/llama/p4_hf_llama3_70b_seq8k_gpu"
     recipe_overrides = {"run": {"results_dir": "/opt/ml/model"}}
     compute = Compute(instance_type="ml.p4d.24xlarge", instance_count="2")
diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py
index 2671cdbcd6..f5239a0dd6 100644
--- a/tests/unit/test_pytorch.py
+++ b/tests/unit/test_pytorch.py
@@ -900,23 +900,27 @@ def test_training_recipe_for_cpu(sagemaker_session):
 )
 @patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
 @patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
-def test_training_recipe_for_gpu(mock_gpu_script, mock_recipe_load, sagemaker_session, recipe, model):
+def test_training_recipe_for_gpu(
+    mock_gpu_script, mock_recipe_load, sagemaker_session, recipe, model
+):
     from omegaconf import OmegaConf
-    
+
     # Mock the GPU script function to return the expected entry point
     mock_gpu_script.return_value = f"{model}_pretrain.py"
-    
+
     # Mock the recipe structure that would be loaded
-    mock_recipe = OmegaConf.create({
-        "trainer": {
-            "num_nodes": 1,
-        },
-        "model": {
-            "model_type": model,
-        },
-    })
+    mock_recipe = OmegaConf.create(
+        {
+            "trainer": {
+                "num_nodes": 1,
+            },
+            "model": {
+                "model_type": model,
+            },
+        }
+    )
     mock_recipe_load.return_value = (recipe, mock_recipe)
-    
+
     container_log_level = '"logging.INFO"'
 
     recipe_overrides = {
@@ -968,21 +972,23 @@ def test_training_recipe_for_gpu(mock_gpu_script, mock_recipe_load, sagemaker_se
 @patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
 def test_training_recipe_with_override(mock_gpu_script, mock_recipe_load, sagemaker_session):
     from omegaconf import OmegaConf
-    
+
     # Mock the GPU script function to return the expected entry point
     mock_gpu_script.return_value = "mistral_pretrain.py"
-    
+
     # Mock the recipe structure that would be loaded
-    mock_recipe = OmegaConf.create({
-        "trainer": {
-            "num_nodes": 1,
-        },
-        "model": {
-            "model_type": "mistral",
-        },
-    })
+    mock_recipe = OmegaConf.create(
+        {
+            "trainer": {
+                "num_nodes": 1,
+            },
+            "model": {
+                "model_type": "mistral",
+            },
+        }
+    )
     mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe)
-    
+
     container_log_level = '"logging.INFO"'
 
     recipe_overrides = {
@@ -1021,23 +1027,27 @@ def test_training_recipe_with_override(mock_gpu_script, mock_recipe_load, sagema
 
 @patch("sagemaker.pytorch.estimator.PyTorch._recipe_load")
 @patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script")
-def test_training_recipe_gpu_custom_source_dir(mock_gpu_script, mock_recipe_load, sagemaker_session):
+def test_training_recipe_gpu_custom_source_dir(
+    mock_gpu_script, mock_recipe_load, sagemaker_session
+):
     from omegaconf import OmegaConf
-    
+
     # Mock the GPU script function to return the expected entry point
     mock_gpu_script.return_value = "mistral_pretrain.py"
-    
+
     # Mock the recipe structure that would be loaded
-    mock_recipe = OmegaConf.create({
-        "trainer": {
-            "num_nodes": 1,
-        },
-        "model": {
-            "model_type": "mistral",
-        },
-    })
+    mock_recipe = OmegaConf.create(
+        {
+            "trainer": {
+                "num_nodes": 1,
+            },
+            "model": {
+                "model_type": "mistral",
+            },
+        }
+    )
     mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe)
-    
+
     container_log_level = '"logging.INFO"'
 
     recipe_overrides = {

From dd8ac20e111bd06aa50bc293cede0c62484a8ffa Mon Sep 17 00:00:00 2001
From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com>
Date: Tue, 9 Dec 2025 18:09:38 -0800
Subject: [PATCH 3/7] Fixing codestyle eror

---
 tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py
index 18ef583d3c..f92751b83b 100644
--- a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py
+++ b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py
@@ -33,7 +33,6 @@
     _get_args_from_nova_recipe,
     _get_args_from_llmft_recipe,
 )
-from sagemaker.modules.utils import _run_clone_command_silent
 from sagemaker.modules.configs import Compute
 
 

From 533842fcca225c02d3c19015ffb41e514cb9db78 Mon Sep 17 00:00:00 2001
From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com>
Date: Tue, 9 Dec 2025 18:29:36 -0800
Subject: [PATCH 4/7] fix codestyle

---
 requirements/extras/test_requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/extras/test_requirements.txt b/requirements/extras/test_requirements.txt
index ecc805fc47..477103ab1d 100644
--- a/requirements/extras/test_requirements.txt
+++ b/requirements/extras/test_requirements.txt
@@ -62,4 +62,4 @@ mypy-boto3-s3==1.35.76
 mypy-extensions==1.0.0
 mypy==1.9.0
 # apache-airflow transitive dependancy
-google-re2<1.1.20250805; python_version < "3.10"
\ No newline at end of file
+google-re2<1.1.20250805; python_version < "3.10"

From 8b62909d21e469a151e0648f91a77446c0be0e69 Mon Sep 17 00:00:00 2001
From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com>
Date: Wed, 10 Dec 2025 04:49:55 -0800
Subject: [PATCH 5/7] Update hub cleanup

---
 tests/integ/sagemaker/jumpstart/conftest.py | 45 ++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/tests/integ/sagemaker/jumpstart/conftest.py b/tests/integ/sagemaker/jumpstart/conftest.py
index 260b0f2b22..e6df9bddd3 100644
--- a/tests/integ/sagemaker/jumpstart/conftest.py
+++ b/tests/integ/sagemaker/jumpstart/conftest.py
@@ -52,7 +52,25 @@ def _setup():
     hub = Hub(
         hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], sagemaker_session=get_sm_session()
     )
-    hub.create(description=test_hub_description)
+    
+    # Check if hub already exists before creating
+    try:
+        hub.describe()
+        print(f"Hub {test_hub_name} already exists, reusing it.")
+    except Exception:
+        # Hub doesn't exist, create it
+        try:
+            hub.create(description=test_hub_description)
+            print(f"Created new hub: {test_hub_name}")
+        except Exception as e:
+            if "ResourceLimitExceeded" in str(e):
+                print(f"Hub limit reached. Cleaning up old hubs...")
+                _cleanup_old_hubs(get_sm_session())
+                # Retry creating the hub
+                hub.create(description=test_hub_description)
+                print(f"Created new hub after cleanup: {test_hub_name}")
+            else:
+                raise
 
 
 def _teardown():
@@ -138,6 +156,31 @@ def _teardown():
     _delete_hubs(sagemaker_session, test_hub_name)
 
 
+def _cleanup_old_hubs(sagemaker_session):
+    """Clean up old test hubs to free up resources."""
+    try:
+        response = sagemaker_session.list_hubs()
+        test_hubs = [
+            hub for hub in response.get("HubSummaries", [])
+            if hub["HubName"].startswith(HUB_NAME_PREFIX)
+        ]
+        
+        # Sort by creation time and delete oldest hubs
+        test_hubs.sort(key=lambda x: x.get("CreationTime", ""))
+        
+        # Delete oldest hubs (keep only the most recent 10)
+        hubs_to_delete = test_hubs[:-10] if len(test_hubs) > 10 else test_hubs[:max(0, len(test_hubs) - 40)]
+        
+        for hub in hubs_to_delete:
+            try:
+                print(f"Deleting old hub: {hub['HubName']}")
+                _delete_hubs(sagemaker_session, hub["HubName"])
+            except Exception as e:
+                print(f"Failed to delete hub {hub['HubName']}: {e}")
+    except Exception as e:
+        print(f"Failed to cleanup old hubs: {e}")
+
+
 def _delete_hubs(sagemaker_session, hub_name):
     # list and delete all hub contents first
     list_hub_content_response = sagemaker_session.list_hub_contents(

From 061f07dcd3114b932c598b976b720c393fe78a6a Mon Sep 17 00:00:00 2001
From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com>
Date: Wed, 10 Dec 2025 04:57:47 -0800
Subject: [PATCH 6/7] reformat file

---
 tests/integ/sagemaker/jumpstart/conftest.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/integ/sagemaker/jumpstart/conftest.py b/tests/integ/sagemaker/jumpstart/conftest.py
index e6df9bddd3..711c84003f 100644
--- a/tests/integ/sagemaker/jumpstart/conftest.py
+++ b/tests/integ/sagemaker/jumpstart/conftest.py
@@ -52,7 +52,7 @@ def _setup():
     hub = Hub(
         hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], sagemaker_session=get_sm_session()
     )
-    
+
     # Check if hub already exists before creating
     try:
         hub.describe()
@@ -161,16 +161,19 @@ def _cleanup_old_hubs(sagemaker_session):
     try:
         response = sagemaker_session.list_hubs()
         test_hubs = [
-            hub for hub in response.get("HubSummaries", [])
+            hub
+            for hub in response.get("HubSummaries", [])
             if hub["HubName"].startswith(HUB_NAME_PREFIX)
         ]
-        
+
         # Sort by creation time and delete oldest hubs
         test_hubs.sort(key=lambda x: x.get("CreationTime", ""))
-        
+
         # Delete oldest hubs (keep only the most recent 10)
-        hubs_to_delete = test_hubs[:-10] if len(test_hubs) > 10 else test_hubs[:max(0, len(test_hubs) - 40)]
-        
+        hubs_to_delete = (
+            test_hubs[:-10] if len(test_hubs) > 10 else test_hubs[: max(0, len(test_hubs) - 40)]
+        )
+
         for hub in hubs_to_delete:
             try:
                 print(f"Deleting old hub: {hub['HubName']}")

From 78065ffe6fb26ef6be499e1c27f9173894840de5 Mon Sep 17 00:00:00 2001
From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com>
Date: Wed, 10 Dec 2025 05:04:50 -0800
Subject: [PATCH 7/7] fix codestyle

---
 tests/integ/sagemaker/jumpstart/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integ/sagemaker/jumpstart/conftest.py b/tests/integ/sagemaker/jumpstart/conftest.py
index 711c84003f..51b0fbf703 100644
--- a/tests/integ/sagemaker/jumpstart/conftest.py
+++ b/tests/integ/sagemaker/jumpstart/conftest.py
@@ -64,7 +64,7 @@ def _setup():
             print(f"Created new hub: {test_hub_name}")
         except Exception as e:
             if "ResourceLimitExceeded" in str(e):
-                print(f"Hub limit reached. Cleaning up old hubs...")
+                print("Hub limit reached. Cleaning up old hubs...")
                 _cleanup_old_hubs(get_sm_session())
                 # Retry creating the hub
                 hub.create(description=test_hub_description)