From 3099e13e03cd140fb142d9a7b65cec283d79ea53 Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Tue, 9 Dec 2025 16:32:24 -0800 Subject: [PATCH 1/7] test pr check --- .github/workflows/codebuild-canaries.yml | 24 ----- .github/workflows/codebuild-ci-health.yml | 84 --------------- .github/workflows/codebuild-ci.yml | 97 ----------------- .github/workflows/codeql.yml | 35 ------- .github/workflows/security-monitoring.yml | 121 ---------------------- 5 files changed, 361 deletions(-) delete mode 100644 .github/workflows/codebuild-canaries.yml delete mode 100644 .github/workflows/codebuild-ci-health.yml delete mode 100644 .github/workflows/codebuild-ci.yml delete mode 100644 .github/workflows/codeql.yml delete mode 100644 .github/workflows/security-monitoring.yml diff --git a/.github/workflows/codebuild-canaries.yml b/.github/workflows/codebuild-canaries.yml deleted file mode 100644 index a6b5a978ef..0000000000 --- a/.github/workflows/codebuild-canaries.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Canaries -on: - schedule: - - cron: "0 */3 * * *" - workflow_dispatch: - -permissions: - id-token: write # This is required for requesting the JWT - -jobs: - tests: - runs-on: ubuntu-latest - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Integ Tests - uses: aws-actions/aws-codebuild-run-build@v1 - id: codebuild - with: - project-name: sagemaker-python-sdk-canaries diff --git a/.github/workflows/codebuild-ci-health.yml b/.github/workflows/codebuild-ci-health.yml deleted file mode 100644 index 119b9dbe9c..0000000000 --- a/.github/workflows/codebuild-ci-health.yml +++ /dev/null @@ -1,84 +0,0 @@ -name: CI Health -on: - schedule: - - cron: "0 */3 * * *" - workflow_dispatch: - -permissions: - id-token: write # This is required for requesting the JWT - -jobs: - codestyle-doc-tests: - runs-on: ubuntu-latest - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Codestyle & Doc Tests - uses: aws-actions/aws-codebuild-run-build@v1 - with: - project-name: sagemaker-python-sdk-ci-health-codestyle-doc-tests - unit-tests: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["py39", "py310", "py311","py312"] - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Unit Tests - uses: aws-actions/aws-codebuild-run-build@v1 - with: - project-name: sagemaker-python-sdk-ci-health-unit-tests - env-vars-for-codebuild: | - PY_VERSION - env: - PY_VERSION: ${{ matrix.python-version }} - integ-tests: - runs-on: ubuntu-latest - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Integ Tests - uses: aws-actions/aws-codebuild-run-build@v1 - id: codebuild - with: - project-name: sagemaker-python-sdk-ci-health-integ-tests - slow-tests: - runs-on: ubuntu-latest - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Slow Tests - uses: aws-actions/aws-codebuild-run-build@v1 - with: - project-name: sagemaker-python-sdk-ci-health-slow-tests - localmode-tests: - runs-on: ubuntu-latest - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Local Mode Tests - uses: aws-actions/aws-codebuild-run-build@v1 - with: - project-name: sagemaker-python-sdk-ci-health-localmode-tests \ No newline at end of file diff --git a/.github/workflows/codebuild-ci.yml b/.github/workflows/codebuild-ci.yml deleted file mode 100644 index 2f9f6aa618..0000000000 --- a/.github/workflows/codebuild-ci.yml +++ /dev/null @@ -1,97 +0,0 @@ -name: PR Checks -on: - pull_request_target: - branches: - - "master-v2" - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref }} - cancel-in-progress: true - -permissions: - id-token: write # This is required for requesting the JWT - -jobs: - collab-check: - runs-on: ubuntu-latest - outputs: - approval-env: ${{ steps.collab-check.outputs.result }} - steps: - - name: Collaborator Check - uses: actions/github-script@v7 - id: collab-check - with: - github-token: ${{ secrets.COLLAB_CHECK_TOKEN }} - result-encoding: string - script: | - try { - const res = await github.rest.repos.checkCollaborator({ - owner: context.repo.owner, - repo: context.repo.repo, - username: "${{ github.event.pull_request.user.login }}", - }); - console.log("Verifed ${{ github.event.pull_request.user.login }} is a repo collaborator. Auto Approving PR Checks.") - return res.status == "204" ? "auto-approve" : "manual-approval" - } catch (error) { - console.log("${{ github.event.pull_request.user.login }} is not a collaborator. Requiring Manual Approval to run PR Checks.") - return "manual-approval" - } - wait-for-approval: - runs-on: ubuntu-latest - needs: [collab-check] - environment: ${{ needs.collab-check.outputs.approval-env }} - steps: - - run: echo "Workflow Approved! Starting PR Checks." - codestyle-doc-tests: - runs-on: ubuntu-latest - needs: [wait-for-approval] - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Codestyle & Doc Tests - uses: aws-actions/aws-codebuild-run-build@v1 - with: - project-name: ${{ github.event.repository.name }}-ci-codestyle-doc-tests - source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}' - unit-tests: - runs-on: ubuntu-latest - needs: [wait-for-approval] - strategy: - fail-fast: false - matrix: - python-version: ["py39","py310","py311","py312"] - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Unit Tests - uses: aws-actions/aws-codebuild-run-build@v1 - with: - project-name: ${{ github.event.repository.name }}-ci-unit-tests - source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}' - env-vars-for-codebuild: | - PY_VERSION - env: - PY_VERSION: ${{ matrix.python-version }} - integ-tests: - runs-on: ubuntu-latest - needs: [wait-for-approval] - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} - aws-region: us-west-2 - role-duration-seconds: 10800 - - name: Run Integ Tests - uses: aws-actions/aws-codebuild-run-build@v1 - with: - project-name: ${{ github.event.repository.name }}-ci-integ-tests - source-version-override: 'refs/pull/${{ github.event.pull_request.number }}/head^{${{ github.event.pull_request.head.sha }}}' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml deleted file mode 100644 index 8fbf42803b..0000000000 --- a/.github/workflows/codeql.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: "CodeQL" -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - schedule: - - cron: '30 15 * * *' -jobs: - analyze: - name: Analyze (${{ matrix.language }}) - runs-on: ${{ 'ubuntu-latest' }} - permissions: - security-events: write - packages: read - - strategy: - matrix: - include: - - language: python - build-mode: none - - language: java-kotlin - build-mode: none - steps: - - name: Checkout repository - uses: actions/checkout@6ccd57f4c5d15bdc2fef309bd9fb6cc9db2ef1c6 - - name: Initialize CodeQL - uses: github/codeql-action/init@4b1d7da102ff94aca014c0245062b1a463356d72 - with: - languages: ${{ matrix.language }} - build-mode: ${{ matrix.build-mode }} - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4b1d7da102ff94aca014c0245062b1a463356d72 - with: - category: "/language:${{matrix.language}}" diff --git a/.github/workflows/security-monitoring.yml b/.github/workflows/security-monitoring.yml deleted file mode 100644 index ecce0643e6..0000000000 --- a/.github/workflows/security-monitoring.yml +++ /dev/null @@ -1,121 +0,0 @@ -name: Security Monitoring - -on: - schedule: - - cron: '0 16 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.run_id }} - cancel-in-progress: true - -permissions: - id-token: write - -jobs: - check-code-scanning-alerts: - runs-on: ubuntu-latest - outputs: - code_scanning_alert_status: ${{ steps.check-code-scanning-alerts.outputs.code_scanning_alert_status }} - steps: - - name: Check for security alerts - id: check-code-scanning-alerts - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea - with: - github-token: ${{ secrets.GH_PAT }} - script: | - async function checkAlerts() { - const owner = '${{ github.repository_owner }}'; - const repo = '${{ github.event.repository.name }}'; - const ref = 'refs/heads/master'; - - const codeScanningAlerts = await github.rest.codeScanning.listAlertsForRepo({ - owner, - repo, - ref: ref - }); - const activeCodeScanningAlerts = codeScanningAlerts.data.filter(alert => alert.state === 'open'); - core.setOutput('code_scanning_alert_status', activeCodeScanningAlerts.length > 0 ? '1': '0'); - } - await checkAlerts(); - - check-dependabot-alerts: - runs-on: ubuntu-latest - outputs: - dependabot_alert_status: ${{ steps.check-dependabot-alerts.outputs.dependabot_alert_status }} - steps: - - name: Check for dependabot alerts - id: check-dependabot-alerts - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea - with: - github-token: ${{ secrets.GH_PAT }} - script: | - async function checkAlerts() { - const owner = '${{ github.repository_owner }}'; - const repo = '${{ github.event.repository.name }}'; - - const dependabotAlerts = await github.rest.dependabot.listAlertsForRepo({ - owner, - repo, - headers: { - 'accept': 'applications/vnd.github+json' - } - }); - const activeDependabotAlerts = dependabotAlerts.data.filter(alert => alert.state === 'open'); - core.setOutput('dependabot_alert_status', activeDependabotAlerts.length > 0 ? '1': '0'); - } - await checkAlerts(); - - check-secret-scanning-alerts: - runs-on: ubuntu-latest - outputs: - secret_scanning_alert_status: ${{ steps.check-secret-scanning-alerts.outputs.secret_scanning_alert_status }} - steps: - - name: Check for secret scanning alerts - id: check-secret-scanning-alerts - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea - with: - github-token: ${{ secrets.GH_PAT }} - script: | - async function checkAlerts() { - const owner = '${{ github.repository_owner }}'; - const repo = '${{ github.event.repository.name }}'; - - const secretScanningAlerts = await github.rest.secretScanning.listAlertsForRepo({ - owner, - repo, - }); - const activeSecretScanningAlerts = secretScanningAlerts.data.filter(alert => alert.state === 'open'); - core.setOutput('secret_scanning_alert_status', activeSecretScanningAlerts.length > 0 ? '1': '0'); - } - await checkAlerts(); - - put-metric-data: - runs-on: ubuntu-latest - needs: [check-code-scanning-alerts, check-dependabot-alerts, check-secret-scanning-alerts] - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@12e3392609eaaceb7ae6191b3f54bbcb85b5002b - with: - role-to-assume: ${{ secrets.MONITORING_ROLE_ARN }} - aws-region: us-west-2 - - name: Put Code Scanning Alert Metric Data - run: | - if [ "${{ needs.check-code-scanning-alerts.outputs.code_scanning_alert_status }}" == "1" ]; then - aws cloudwatch put-metric-data --metric-name CodeScanningAlert --namespace SecurityMonitoringMetrics --value 1 --unit Count --dimensions ProjectName=sagemaker-python-sdk - else - aws cloudwatch put-metric-data --metric-name CodeScanningAlert --namespace SecurityMonitoringMetrics --value 0 --unit Count --dimensions ProjectName=sagemaker-python-sdk - fi - - name: Put Dependabot Alert Metric Data - run: | - if [ "${{ needs.check-dependabot-alerts.outputs.dependabot_alert_status }}" == "1" ]; then - aws cloudwatch put-metric-data --metric-name DependabotAlert --namespace SecurityMonitoringMetrics --value 1 --unit Count --dimensions ProjectName=sagemaker-python-sdk - else - aws cloudwatch put-metric-data --metric-name DependabotAlert --namespace SecurityMonitoringMetrics --value 0 --unit Count --dimensions ProjectName=sagemaker-python-sdk - fi - - name: Put Secret Scanning Alert Metric Data - run: | - if [ "${{ needs.check-secret-scanning-alerts.outputs.secret_scanning_alert_status }}" == "1" ]; then - aws cloudwatch put-metric-data --metric-name SecretScanningAlert --namespace SecurityMonitoringMetrics --value 1 --unit Count --dimensions ProjectName=sagemaker-python-sdk - else - aws cloudwatch put-metric-data --metric-name SecretScanningAlert --namespace SecurityMonitoringMetrics --value 0 --unit Count --dimensions ProjectName=sagemaker-python-sdk - fi From 6235d7e4c5d9ec90ec48669e19dc950f7d555d93 Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Tue, 9 Dec 2025 17:40:13 -0800 Subject: [PATCH 2/7] reformat files --- src/sagemaker/__init__.py | 2 +- .../modules/train/sm_recipes/test_utils.py | 18 ++--- .../modules/train/test_model_trainer.py | 14 ++-- tests/unit/test_pytorch.py | 80 +++++++++++-------- 4 files changed, 63 insertions(+), 51 deletions(-) diff --git a/src/sagemaker/__init__.py b/src/sagemaker/__init__.py index 0167b7139a..6a3f43f66c 100644 --- a/src/sagemaker/__init__.py +++ b/src/sagemaker/__init__.py @@ -63,7 +63,7 @@ from sagemaker.automl.automl import AutoML, AutoMLJob, AutoMLInput # noqa: F401 from sagemaker.automl.candidate_estimator import CandidateEstimator, CandidateStep # noqa: F401 from sagemaker.automl.automlv2 import ( # noqa: F401 - AutoMLV2, + AutoMLV2, AutoMLJobV2, LocalAutoMLDataChannel, AutoMLDataChannel, diff --git a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py index db0fb1c279..18ef583d3c 100644 --- a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py +++ b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py @@ -110,24 +110,24 @@ def test_load_base_recipe_types( if recipe_type == "sagemaker": # Mock the file check to simulate recipe exists - with patch("os.path.isfile", return_value=True), \ - patch("shutil.copy") as mock_copy: + with patch("os.path.isfile", return_value=True), patch("shutil.copy") as mock_copy: # Create a temporary recipe file for the copy operation import tempfile import os - with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: yaml.dump({"trainer": {"num_nodes": 1}}, f) temp_path = f.name - + def copy_side_effect(src, dst): # Read from temp file and write to destination - with open(temp_path, 'r') as src_file: + with open(temp_path, "r") as src_file: content = src_file.read() - with open(dst, 'w') as dst_file: + with open(dst, "w") as dst_file: dst_file.write(content) - + mock_copy.side_effect = copy_side_effect - + load_recipe = _load_base_recipe( training_recipe="training/llama/p4_hf_llama3_70b_seq8k_gpu", recipe_overrides=None, @@ -136,7 +136,7 @@ def copy_side_effect(src, dst): assert load_recipe is not None assert "trainer" in load_recipe assert mock_clone.call_args.args[0] == training_recipes_cfg.get("launcher_repo") - + # Clean up os.unlink(temp_path) diff --git a/tests/unit/sagemaker/modules/train/test_model_trainer.py b/tests/unit/sagemaker/modules/train/test_model_trainer.py index 6431f96502..367ab2a84e 100644 --- a/tests/unit/sagemaker/modules/train/test_model_trainer.py +++ b/tests/unit/sagemaker/modules/train/test_model_trainer.py @@ -934,13 +934,15 @@ def mock_upload_data(path, bucket, key_prefix): @patch("sagemaker.modules.train.model_trainer._load_base_recipe") def test_model_trainer_gpu_recipe_full_init(mock_load_recipe, modules_session): from omegaconf import OmegaConf - + # Mock the recipe loading to return a valid GPU recipe structure - mock_load_recipe.return_value = OmegaConf.create({ - "trainer": {"num_nodes": 2}, - "model": {"model_type": "llama_v3"}, - }) - + mock_load_recipe.return_value = OmegaConf.create( + { + "trainer": {"num_nodes": 2}, + "model": {"model_type": "llama_v3"}, + } + ) + training_recipe = "training/llama/p4_hf_llama3_70b_seq8k_gpu" recipe_overrides = {"run": {"results_dir": "/opt/ml/model"}} compute = Compute(instance_type="ml.p4d.24xlarge", instance_count="2") diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index 2671cdbcd6..f5239a0dd6 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -900,23 +900,27 @@ def test_training_recipe_for_cpu(sagemaker_session): ) @patch("sagemaker.pytorch.estimator.PyTorch._recipe_load") @patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script") -def test_training_recipe_for_gpu(mock_gpu_script, mock_recipe_load, sagemaker_session, recipe, model): +def test_training_recipe_for_gpu( + mock_gpu_script, mock_recipe_load, sagemaker_session, recipe, model +): from omegaconf import OmegaConf - + # Mock the GPU script function to return the expected entry point mock_gpu_script.return_value = f"{model}_pretrain.py" - + # Mock the recipe structure that would be loaded - mock_recipe = OmegaConf.create({ - "trainer": { - "num_nodes": 1, - }, - "model": { - "model_type": model, - }, - }) + mock_recipe = OmegaConf.create( + { + "trainer": { + "num_nodes": 1, + }, + "model": { + "model_type": model, + }, + } + ) mock_recipe_load.return_value = (recipe, mock_recipe) - + container_log_level = '"logging.INFO"' recipe_overrides = { @@ -968,21 +972,23 @@ def test_training_recipe_for_gpu(mock_gpu_script, mock_recipe_load, sagemaker_se @patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script") def test_training_recipe_with_override(mock_gpu_script, mock_recipe_load, sagemaker_session): from omegaconf import OmegaConf - + # Mock the GPU script function to return the expected entry point mock_gpu_script.return_value = "mistral_pretrain.py" - + # Mock the recipe structure that would be loaded - mock_recipe = OmegaConf.create({ - "trainer": { - "num_nodes": 1, - }, - "model": { - "model_type": "mistral", - }, - }) + mock_recipe = OmegaConf.create( + { + "trainer": { + "num_nodes": 1, + }, + "model": { + "model_type": "mistral", + }, + } + ) mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe) - + container_log_level = '"logging.INFO"' recipe_overrides = { @@ -1021,23 +1027,27 @@ def test_training_recipe_with_override(mock_gpu_script, mock_recipe_load, sagema @patch("sagemaker.pytorch.estimator.PyTorch._recipe_load") @patch("sagemaker.pytorch.estimator._get_training_recipe_gpu_script") -def test_training_recipe_gpu_custom_source_dir(mock_gpu_script, mock_recipe_load, sagemaker_session): +def test_training_recipe_gpu_custom_source_dir( + mock_gpu_script, mock_recipe_load, sagemaker_session +): from omegaconf import OmegaConf - + # Mock the GPU script function to return the expected entry point mock_gpu_script.return_value = "mistral_pretrain.py" - + # Mock the recipe structure that would be loaded - mock_recipe = OmegaConf.create({ - "trainer": { - "num_nodes": 1, - }, - "model": { - "model_type": "mistral", - }, - }) + mock_recipe = OmegaConf.create( + { + "trainer": { + "num_nodes": 1, + }, + "model": { + "model_type": "mistral", + }, + } + ) mock_recipe_load.return_value = ("hf_llama3_8b_seq8k_gpu_p5x16_pretrain", mock_recipe) - + container_log_level = '"logging.INFO"' recipe_overrides = { From dd8ac20e111bd06aa50bc293cede0c62484a8ffa Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:09:38 -0800 Subject: [PATCH 3/7] Fixing codestyle eror --- tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py index 18ef583d3c..f92751b83b 100644 --- a/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py +++ b/tests/unit/sagemaker/modules/train/sm_recipes/test_utils.py @@ -33,7 +33,6 @@ _get_args_from_nova_recipe, _get_args_from_llmft_recipe, ) -from sagemaker.modules.utils import _run_clone_command_silent from sagemaker.modules.configs import Compute From 533842fcca225c02d3c19015ffb41e514cb9db78 Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:29:36 -0800 Subject: [PATCH 4/7] fix codestyle --- requirements/extras/test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/extras/test_requirements.txt b/requirements/extras/test_requirements.txt index ecc805fc47..477103ab1d 100644 --- a/requirements/extras/test_requirements.txt +++ b/requirements/extras/test_requirements.txt @@ -62,4 +62,4 @@ mypy-boto3-s3==1.35.76 mypy-extensions==1.0.0 mypy==1.9.0 # apache-airflow transitive dependancy -google-re2<1.1.20250805; python_version < "3.10" \ No newline at end of file +google-re2<1.1.20250805; python_version < "3.10" From 8b62909d21e469a151e0648f91a77446c0be0e69 Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Wed, 10 Dec 2025 04:49:55 -0800 Subject: [PATCH 5/7] Update hub cleanup --- tests/integ/sagemaker/jumpstart/conftest.py | 45 ++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tests/integ/sagemaker/jumpstart/conftest.py b/tests/integ/sagemaker/jumpstart/conftest.py index 260b0f2b22..e6df9bddd3 100644 --- a/tests/integ/sagemaker/jumpstart/conftest.py +++ b/tests/integ/sagemaker/jumpstart/conftest.py @@ -52,7 +52,25 @@ def _setup(): hub = Hub( hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], sagemaker_session=get_sm_session() ) - hub.create(description=test_hub_description) + + # Check if hub already exists before creating + try: + hub.describe() + print(f"Hub {test_hub_name} already exists, reusing it.") + except Exception: + # Hub doesn't exist, create it + try: + hub.create(description=test_hub_description) + print(f"Created new hub: {test_hub_name}") + except Exception as e: + if "ResourceLimitExceeded" in str(e): + print(f"Hub limit reached. Cleaning up old hubs...") + _cleanup_old_hubs(get_sm_session()) + # Retry creating the hub + hub.create(description=test_hub_description) + print(f"Created new hub after cleanup: {test_hub_name}") + else: + raise def _teardown(): @@ -138,6 +156,31 @@ def _teardown(): _delete_hubs(sagemaker_session, test_hub_name) +def _cleanup_old_hubs(sagemaker_session): + """Clean up old test hubs to free up resources.""" + try: + response = sagemaker_session.list_hubs() + test_hubs = [ + hub for hub in response.get("HubSummaries", []) + if hub["HubName"].startswith(HUB_NAME_PREFIX) + ] + + # Sort by creation time and delete oldest hubs + test_hubs.sort(key=lambda x: x.get("CreationTime", "")) + + # Delete oldest hubs (keep only the most recent 10) + hubs_to_delete = test_hubs[:-10] if len(test_hubs) > 10 else test_hubs[:max(0, len(test_hubs) - 40)] + + for hub in hubs_to_delete: + try: + print(f"Deleting old hub: {hub['HubName']}") + _delete_hubs(sagemaker_session, hub["HubName"]) + except Exception as e: + print(f"Failed to delete hub {hub['HubName']}: {e}") + except Exception as e: + print(f"Failed to cleanup old hubs: {e}") + + def _delete_hubs(sagemaker_session, hub_name): # list and delete all hub contents first list_hub_content_response = sagemaker_session.list_hub_contents( From 061f07dcd3114b932c598b976b720c393fe78a6a Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Wed, 10 Dec 2025 04:57:47 -0800 Subject: [PATCH 6/7] reformat file --- tests/integ/sagemaker/jumpstart/conftest.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/integ/sagemaker/jumpstart/conftest.py b/tests/integ/sagemaker/jumpstart/conftest.py index e6df9bddd3..711c84003f 100644 --- a/tests/integ/sagemaker/jumpstart/conftest.py +++ b/tests/integ/sagemaker/jumpstart/conftest.py @@ -52,7 +52,7 @@ def _setup(): hub = Hub( hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], sagemaker_session=get_sm_session() ) - + # Check if hub already exists before creating try: hub.describe() @@ -161,16 +161,19 @@ def _cleanup_old_hubs(sagemaker_session): try: response = sagemaker_session.list_hubs() test_hubs = [ - hub for hub in response.get("HubSummaries", []) + hub + for hub in response.get("HubSummaries", []) if hub["HubName"].startswith(HUB_NAME_PREFIX) ] - + # Sort by creation time and delete oldest hubs test_hubs.sort(key=lambda x: x.get("CreationTime", "")) - + # Delete oldest hubs (keep only the most recent 10) - hubs_to_delete = test_hubs[:-10] if len(test_hubs) > 10 else test_hubs[:max(0, len(test_hubs) - 40)] - + hubs_to_delete = ( + test_hubs[:-10] if len(test_hubs) > 10 else test_hubs[: max(0, len(test_hubs) - 40)] + ) + for hub in hubs_to_delete: try: print(f"Deleting old hub: {hub['HubName']}") From 78065ffe6fb26ef6be499e1c27f9173894840de5 Mon Sep 17 00:00:00 2001 From: jzhaoqwa <52220743+zhaoqizqwang@users.noreply.github.com> Date: Wed, 10 Dec 2025 05:04:50 -0800 Subject: [PATCH 7/7] fix codestyle --- tests/integ/sagemaker/jumpstart/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/sagemaker/jumpstart/conftest.py b/tests/integ/sagemaker/jumpstart/conftest.py index 711c84003f..51b0fbf703 100644 --- a/tests/integ/sagemaker/jumpstart/conftest.py +++ b/tests/integ/sagemaker/jumpstart/conftest.py @@ -64,7 +64,7 @@ def _setup(): print(f"Created new hub: {test_hub_name}") except Exception as e: if "ResourceLimitExceeded" in str(e): - print(f"Hub limit reached. Cleaning up old hubs...") + print("Hub limit reached. Cleaning up old hubs...") _cleanup_old_hubs(get_sm_session()) # Retry creating the hub hub.create(description=test_hub_description)