diff --git a/sagemaker-core/example_notebooks/get_started.ipynb b/sagemaker-core/example_notebooks/get_started.ipynb index ee1a29e412..e3d5b9daf6 100644 --- a/sagemaker-core/example_notebooks/get_started.ipynb +++ b/sagemaker-core/example_notebooks/get_started.ipynb @@ -187,7 +187,13 @@ "metadata": {}, "outputs": [], "source": [ - "image = '141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-xgboost:1.7-1'" + "from sagemaker.core import image_uris\n", + "\n", + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='1.7-1'\n", + ")" ] }, { @@ -687,7 +693,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "temp_env", "language": "python", "name": "python3" }, diff --git a/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb b/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb index 0a410056a5..06c03d8765 100644 --- a/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb +++ b/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb @@ -241,11 +241,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Image name is hardcoded here\n", - "# Image name can be programatically got by using sagemaker package and calling image_uris.retrieve\n", - "# Since that is a high level abstraction that has multiple dependencies, the image URIs functionalities will live in sagemaker (V2)\n", + "from sagemaker.core import image_uris\n", "\n", - "image = \"433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest\"" + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='latest'\n", + ")" ] }, { diff --git a/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb b/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb index ff3bdeda2f..aabdaba7d8 100644 --- a/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb +++ b/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb @@ -269,11 +269,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Image name is hardcoded here\n", - "# Image name can be programatically got by using sagemaker package and calling image_uris.retrieve\n", - "# Since that is a high level abstraction that has multiple dependencies, the image URIs functionalities will live in sagemaker (V2)\n", + "from sagemaker.core import image_uris\n", "\n", - "image = \"433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest\"" + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='latest'\n", + ")" ] }, { diff --git a/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb b/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb index 819a65c3ec..f68658ea4c 100644 --- a/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb +++ b/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb @@ -255,11 +255,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Image name is hardcoded here\n", - "# Image name can be programatically got by using sagemaker package and calling image_uris.retrieve\n", - "# Since that is a high level abstraction that has multiple dependencies, the image URIs functionalities will live in sagemaker (V2)\n", + "from sagemaker.core import image_uris\n", "\n", - "image = \"433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest\"" + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='latest'\n", + ")" ] }, { diff --git a/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb index 5cb75f506c..e0133a9272 100644 --- a/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb +++ b/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb @@ -21,74 +21,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n", - "│ <_Benchmark.MMLU: 'mmlu'>,\n", - "│ <_Benchmark.MMLU_PRO: 'mmlu_pro'>,\n", - "│ <_Benchmark.BBH: 'bbh'>,\n", - "│ <_Benchmark.GPQA: 'gpqa'>,\n", - "│ <_Benchmark.MATH: 'math'>,\n", - "│ <_Benchmark.STRONG_REJECT: 'strong_reject'>,\n", - "│ <_Benchmark.IFEVAL: 'ifeval'>,\n", - "│ <_Benchmark.GEN_QA: 'gen_qa'>,\n", - "│ <_Benchmark.MMMU: 'mmmu'>,\n", - "│ <_Benchmark.LLM_JUDGE: 'llm_judge'>,\n", - "│ <_Benchmark.INFERENCE_ONLY: 'inference_only'>\n", - "]\n", - "\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225m_Benchmark.MMLU:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'mmlu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMLU_PRO: \u001b[0m\u001b[38;2;0;135;0m'mmlu_pro'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.BBH: \u001b[0m\u001b[38;2;0;135;0m'bbh'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GPQA: \u001b[0m\u001b[38;2;0;135;0m'gpqa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MATH: \u001b[0m\u001b[38;2;0;135;0m'math'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.STRONG_REJECT: \u001b[0m\u001b[38;2;0;135;0m'strong_reject'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.IFEVAL: \u001b[0m\u001b[38;2;0;135;0m'ifeval'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMMU: \u001b[0m\u001b[38;2;0;135;0m'mmmu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.LLM_JUDGE: \u001b[0m\u001b[38;2;0;135;0m'llm_judge'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.INFERENCE_ONLY: \u001b[0m\u001b[38;2;0;135;0m'inference_only'\u001b[0m\u001b[1m>\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n", - "│ 'modality': 'Multi-Modal (image)',\n", - "│ 'description': 'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.',\n", - "│ 'metrics': ['all'],\n", - "│ 'strategy': 'gen_qa',\n", - "│ 'subtask_available': False,\n", - "│ 'subtasks': None\n", - "}\n", - "\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'modality'\u001b[0m: \u001b[38;2;0;135;0m'Multi-Modal \u001b[0m\u001b[1;38;2;0;135;0m(\u001b[0m\u001b[38;2;0;135;0mimage\u001b[0m\u001b[1;38;2;0;135;0m)\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'description'\u001b[0m: \u001b[38;2;0;135;0m'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'metrics'\u001b[0m: \u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'all'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtask_available'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtasks'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import get_benchmarks, get_benchmark_properties\n", "from rich.pretty import pprint\n", @@ -134,108 +69,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:39:45] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:39:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=314173;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126855;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO Resolved MLflow resource ARN: base_evaluator.py:113\n", - " arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \n", - " mmlu-eval-experiment \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=480390;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=329695;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Model package group provided as ARN: base_evaluator.py:145\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \n", - " mple-name-aovqo \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Model package group provided as ARN: \u001b]8;id=572070;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=299487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#145\u001b\\\u001b[2m145\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchMarkEvaluator(\n", - "│ region=None,\n", - "│ sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x13cd28e60>,\n", - "│ model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n", - "│ base_eval_name='gen-qa-eval-demo',\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group='arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo',\n", - "│ benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n", - "│ subtasks=None,\n", - "│ dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n", - "│ evaluate_base_model=True\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x13cd28e60\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import BenchMarkEvaluator\n", "\n", @@ -260,43 +94,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", - "│ in <module>:13 │\n", - "│ │\n", - "│ 10 # Create evaluator with GEN_QA benchmark │\n", - "│ 11 # These values match our successfully tested configuration │\n", - "│ 12 evaluator = BenchMarkEvaluator( │\n", - "│ ❱ 13 │ benchmark=Benchmark.GEN_QA, │\n", - "│ 14 │ model=\"meta-textgeneration-llama-3-2-1b-instruct\", │\n", - "│ 15 │ s3_output_path=\"s3://mufi-test-serverless-smtj/eval/\", │\n", - "│ 16 │ mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "NameError: name 'Benchmark' is not defined\n", - "\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in
BenchMarkEvaluator(\n", - "│ region='us-east-1',\n", - "│ sagemaker_session=<sagemaker_core.helper.session_helper.Session object at 0x356a03950>,\n", - "│ model='arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3',\n", - "│ base_eval_name='gen-qa-eval-demo',\n", - "│ s3_output_path='s3://mufi-test-serverless-iad/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group='arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models',\n", - "│ benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n", - "│ subtasks=None,\n", - "│ dataset='s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n", - "│ evaluate_base_model=True\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[38;2;0;135;0m'us-east-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker_core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x356a03950\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-iad/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# # [Optional] Nova testing IAD Prod\n", "\n", @@ -411,156 +155,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:26:31] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:26:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=665742;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=28065;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n", - " property \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=668827;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=344195;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching hub content metadata for recipe_utils.py:201\n", - " meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=912465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=530916;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING No region provided. Using default region. utils.py:340\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=483608;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=394176;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Runs on sagemaker us-west-2, region:us-west-2 utils.py:354\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=127187;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=740445;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for evaluation recipe with Type='Evaluation' and recipe_utils.py:221\n", - " EvaluationType='DeterministicEvaluation' \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=26417;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=309515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Downloading override parameters from recipe_utils.py:249\n", - " s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta- \n", - " textgeneration-llama-3-2-1b-instruct-deterministic_override_params \n", - " _sm_jobs_v1.0.19.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762738;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=1149;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n", - "│ 'max_new_tokens': '8192',\n", - "│ 'temperature': '0',\n", - "│ 'top_k': '-1',\n", - "│ 'top_p': '1.0',\n", - "│ 'aggregation': '',\n", - "│ 'postprocessing': 'False',\n", - "│ 'max_model_len': '12000'\n", - "}\n", - "\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "pprint(evaluator.hyperparameters.to_dict())\n", "\n", @@ -591,1114 +188,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:40:20] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=39435;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=899931;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Getting or creating artifact for source: base_evaluator.py:597\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=774478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=222956;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for existing artifact for model package: base_evaluator.py:459\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=672788;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=533927;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing artifact: base_evaluator.py:468\n", - " arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \n", - " 138877d772ec489bef \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=555230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=311641;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using resolved model_package_group ARN: base_evaluator.py:414\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \n", - " mple-name-aovqo \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using resolved model_package_group ARN: \u001b]8;id=350625;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=393598;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#414\u001b\\\u001b[2m414\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using ModelPackage - model_package_group_arn: benchmark_evaluator.py:644\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-grou \n", - " p/example-name-aovqo \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=534430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=895229;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#644\u001b\\\u001b[2m644\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-grou \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m p/example-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved model info - base_model_name: benchmark_evaluator.py:647\n", - " meta-textgeneration-llama-3-2-1b-instruct, base_model_arn: \n", - " arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic \n", - " Hub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0, \n", - " source_model_package_arn: \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test \n", - " -finetuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=1084;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=849460;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#647\u001b\\\u001b[2m647\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m Hub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=537782;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=387290;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n", - " property \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=706064;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=284205;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching hub content metadata for recipe_utils.py:201\n", - " meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=502448;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=531984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for evaluation recipe with Type='Evaluation' and recipe_utils.py:221\n", - " EvaluationType='DeterministicEvaluation' \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=67072;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=119115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Downloading override parameters from recipe_utils.py:249\n", - " s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta- \n", - " textgeneration-llama-3-2-1b-instruct-deterministic_override_params \n", - " _sm_jobs_v1.0.19.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=954396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=959350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:21] INFO Using configured hyperparameters: {'max_new_tokens': benchmark_evaluator.py:568\n", - " '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', \n", - " 'aggregation': '', 'postprocessing': 'False', \n", - " 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=584498;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126531;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#568\u001b\\\u001b[2m568\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using full template for ModelPackage base_evaluator.py:655\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=556396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=773270;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved template parameters: {'role_arn': base_evaluator.py:693\n", - " 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment', 'mlflow_experiment_name': None, \n", - " 'mlflow_run_name': None, 'model_package_group_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex \n", - " ample-name-aovqo', 'source_model_package_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28', 'base_model_arn': \n", - " 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', \n", - " 's3_output_path': 's3://mufi-test-serverless-smtj/eval/', \n", - " 'dataset_artifact_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef', 'action_arn_prefix': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:action', \n", - " 'dataset_uri': \n", - " 's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task': \n", - " 'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all', \n", - " 'subtask': '', 'pipeline_name': \n", - " 'SagemakerEvaluation-Deterministic', 'evaluate_base_model': \n", - " True, 'max_new_tokens': '8192', 'temperature': '0', 'top_k': \n", - " '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing': \n", - " 'False', 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=970601;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=386360;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'subtask'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[3;38;2;0;135;0mTrue\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Rendered pipeline definition: base_evaluator.py:702\n", - " { \n", - " \"Version\": \"2020-12-01\", \n", - " \"Metadata\": {}, \n", - " \"MlflowConfig\": { \n", - " \"MlflowResourceArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment\" \n", - " }, \n", - " \"Parameters\": [], \n", - " \"Steps\": [ \n", - " { \n", - " \"Name\": \"CreateEvaluationAction\", \n", - " \"Type\": \"Lineage\", \n", - " \"Arguments\": { \n", - " \"Actions\": [ \n", - " { \n", - " \"ActionName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ActionType\": \"Evaluation\", \n", - " \"Source\": { \n", - " \"SourceUri\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\", \n", - " \"SourceType\": \"ModelPackage\" \n", - " }, \n", - " \"Properties\": { \n", - " \"PipelineExecutionArn\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " }, \n", - " \"PipelineName\": \n", - " \"SagemakerEvaluation-Deterministic\" \n", - " } \n", - " } \n", - " ], \n", - " \"Contexts\": [ \n", - " { \n", - " \"ContextName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ContextType\": \"PipelineExecution\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Action\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Context\" \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Arn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateBaseModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex \n", - " ample-name-aovqo\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"BenchmarkEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"task\": \"gen_qa\", \n", - " \"strategy\": \"gen_qa\", \n", - " \"evaluation_metric\": \"all\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\", \n", - " \"max_model_len\": \"12000\", \n", - " \"aggregation\": \"\", \n", - " \"postprocessing\": \"False\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \n", - " \"s3://mufi-test-serverless-smtj/eval/\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex \n", - " ample-name-aovqo\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"BenchmarkEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"task\": \"gen_qa\", \n", - " \"strategy\": \"gen_qa\", \n", - " \"evaluation_metric\": \"all\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\", \n", - " \"max_model_len\": \"12000\", \n", - " \"aggregation\": \"\", \n", - " \"postprocessing\": \"False\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \n", - " \"s3://mufi-test-serverless-smtj/eval/\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"AssociateLineage\", \n", - " \"Type\": \"Lineage\", \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"Artifacts\": [ \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"base-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " }, \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"base-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " ] \n", - " } \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=330131;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=262009;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateBaseModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing pipeline: execution.py:199\n", - " SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2 \n", - " 9171c42 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=588942;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=925025;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline execution.py:202\n", - " SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2 \n", - " 9171c42 with latest definition \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=746487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=234699;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline resource. resources.py:30306\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=908194;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=233215;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:22] INFO Successfully updated pipeline: execution.py:208\n", - " SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2 \n", - " 9171c42 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=321336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=381496;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Starting pipeline execution: gen-qa-eval-demo-1764452422 execution.py:263\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: gen-qa-eval-demo-\u001b[1;36m1764452422\u001b[0m \u001b]8;id=359442;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=958972;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Pipeline execution started: execution.py:274\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/9 \n", - " 5qr3e96dblb \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=73999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b29171c42\u001b[0m/execution/9 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 5qr3e96dblb \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb',\n", - "│ name='gen-qa-eval-demo',\n", - "│ status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 29, 13, 40, 22, 284000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m40\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m284000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb\n", - "Initial Status: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Run evaluation with configured parameters\n", "execution = evaluator.evaluate()\n", @@ -1739,92 +231,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Executing',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomModel',\n", - "│ │ │ status='Executing',\n", - "│ │ │ start_time='2025-11-29T13:26:38.084000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateBaseModel',\n", - "│ │ │ status='Executing',\n", - "│ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ end_time='2025-11-29T13:26:42.759000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n", - "│ Overall Status Succeeded │\n", - "│ Target Status Succeeded │\n", - "│ Elapsed Time 0.5s │\n", - "│ │\n", - "│ Pipeline Steps │\n", - "│ Step Name Status Duration │\n", - "│ AssociateLineage Succeeded 3.3s │\n", - "│ EvaluateCustomModel Succeeded 3714.0s │\n", - "│ EvaluateBaseModel Succeeded 5366.2s │\n", - "│ CreateEvaluationAction Succeeded 2.7s │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.5s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3.3s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3714.0s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateBaseModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m5366.2s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.7s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:20] INFO Final Resource Status: Succeeded execution.py:979\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=401306;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=749;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], + "outputs": [], "source": [ "# Wait for job completion with progress updates\n", "# This will show a rich progress display in Jupyter\n", @@ -1956,343 +308,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
's3://mufi-test-serverless-smtj/eval/'\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "[11/29/25 16:21:25] INFO S3 bucket: mufi-test-serverless-smtj, prefix: eval show_results_utils.py:130\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:25]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=671086;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=908024;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted training job name: show_results_utils.py:63\n", - " pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from \n", - " step: EvaluateCustomModel \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=813615;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=57499;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:26] INFO Extracted training job name: show_results_utils.py:63\n", - " pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from \n", - " step: EvaluateBaseModel \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:26]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=745707;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953308;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateBaseModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for results_*.json in show_results_utils.py:150\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E \n", - " valuateCustomModel-F51y8F3Pg7/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=805603;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=739949;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-F51y8F3Pg7/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:168\n", - " eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o \n", - " utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \n", - " -or8pa/eval_results/results_2025-11-29T22-41-53.186048+00-00 \n", - " .json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=188825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=667854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T22-\u001b[1;36m41\u001b[0m-\u001b[1;36m53.186048\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for results_*.json in show_results_utils.py:150\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E \n", - " valuateBaseModel-VA9YzcdIVI/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=270113;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844454;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateBaseModel-VA9YzcdIVI/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:168\n", - " eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out \n", - " put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o \n", - " r8pa/eval_results/results_2025-11-29T23-09-21.277725+00-00.j \n", - " son \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=221667;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=736866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m r8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m09\u001b[0m-\u001b[1;36m21.277725\u001b[0m+\u001b[1;36m00-00.j\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m son \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using metrics from 'all' key (standard benchmark format) show_results_utils.py:93\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=431825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75452;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using metrics from 'all' key (standard benchmark format) show_results_utils.py:93\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=866976;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=697222;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Custom Model Results \n", - "╭────────────────────────────────┬─────────────────╮\n", - "│ Metric │ Value │\n", - "├────────────────────────────────┼─────────────────┤\n", - "│ bleu │ 6.6928 │\n", - "│ bleu_stderr │ 0.7801 │\n", - "│ em │ 1.23% │\n", - "│ em_stderr │ 0.0018 │\n", - "│ f1 │ 19.04% │\n", - "│ f1_score_quasi │ 25.25% │\n", - "│ f1_score_quasi_stderr │ 0.0049 │\n", - "│ f1_stderr │ 0.0047 │\n", - "│ qem │ 2.16% │\n", - "│ qem_stderr │ 0.0024 │\n", - "│ rouge1 │ 25.69% │\n", - "│ rouge1_stderr │ 0.0047 │\n", - "│ rouge2 │ 19.09% │\n", - "│ rouge2_stderr │ 0.0047 │\n", - "│ rougeL │ 25.02% │\n", - "│ rougeL_stderr │ 0.0047 │\n", - "╰────────────────────────────────┴─────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7801\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.23%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0018\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.25%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.16%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.69%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.02%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Base Model Results \n", - "╭────────────────────────────────┬─────────────────╮\n", - "│ Metric │ Value │\n", - "├────────────────────────────────┼─────────────────┤\n", - "│ bleu │ 6.6928 │\n", - "│ bleu_stderr │ 0.7803 │\n", - "│ em │ 1.29% │\n", - "│ em_stderr │ 0.0019 │\n", - "│ f1 │ 19.09% │\n", - "│ f1_score_quasi │ 25.22% │\n", - "│ f1_score_quasi_stderr │ 0.0049 │\n", - "│ f1_stderr │ 0.0047 │\n", - "│ qem │ 2.18% │\n", - "│ qem_stderr │ 0.0024 │\n", - "│ rouge1 │ 25.61% │\n", - "│ rouge1_stderr │ 0.0047 │\n", - "│ rouge2 │ 19.04% │\n", - "│ rouge2_stderr │ 0.0047 │\n", - "│ rougeL │ 24.95% │\n", - "│ rougeL_stderr │ 0.0047 │\n", - "╰────────────────────────────────┴─────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;33mBase Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;33m \u001b[0m\u001b[1;33mMetric \u001b[0m\u001b[1;33m \u001b[0m│\u001b[1;33m \u001b[0m\u001b[1;33m Value\u001b[0m\u001b[1;33m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7803\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.22%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.18%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.61%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 24.95%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n", - "│ │\n", - "│ │\n", - "│ 📦 Full evaluation artifacts available at: │\n", - "│ │\n", - "│ Custom Model: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non │\n", - "│ e/eval_results/ │\n", - "│ │\n", - "│ Base Model: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/ │\n", - "│ eval_results/ │\n", - "│ │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;33mBase Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36meval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "pprint(execution.s3_output_path)\n", "# Display results in a formatted table\n", @@ -2312,197 +330,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:35:47] INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \n", - " s3://mufi-test-serverless-smtj/eval/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:35:47]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=148252;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes',\n", - "│ name='inlsexrd7jes',\n", - "│ status=PipelineExecutionStatus(\n", - "│ │ overall_status='Executing',\n", - "│ │ step_details=[\n", - "│ │ │ StepDetail(\n", - "│ │ │ │ name='EvaluateCustomModel',\n", - "│ │ │ │ status='Executing',\n", - "│ │ │ │ start_time='2025-11-29T13:26:38.084000-08:00',\n", - "│ │ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ │ display_name=None,\n", - "│ │ │ │ failure_reason=None\n", - "│ │ │ ),\n", - "│ │ │ StepDetail(\n", - "│ │ │ │ name='EvaluateBaseModel',\n", - "│ │ │ │ status='Executing',\n", - "│ │ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ │ display_name=None,\n", - "│ │ │ │ failure_reason=None\n", - "│ │ │ ),\n", - "│ │ │ StepDetail(\n", - "│ │ │ │ name='CreateEvaluationAction',\n", - "│ │ │ │ status='Succeeded',\n", - "│ │ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ │ end_time='2025-11-29T13:26:42.759000-08:00',\n", - "│ │ │ │ display_name=None,\n", - "│ │ │ │ failure_reason=None\n", - "│ │ │ )\n", - "│ │ ],\n", - "│ │ failure_reason=None\n", - "│ ),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 29, 13, 26, 37, 300000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", - "│ in <module>:22 │\n", - "│ │\n", - "│ 19 pprint(existing_execution) │\n", - "│ 20 print(f\"\\nStatus: {existing_execution.status.overall_status}\") │\n", - "│ 21 │\n", - "│ ❱ 22 existing_execution.show_results() │\n", - "│ 23 │\n", - "│ │\n", - "│ /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/tele │\n", - "│ metry_logging.py:175 in wrapper │\n", - "│ │\n", - "│ 172 │ │ │ │ │ \"sagemaker_session is not provided or not valid.\", │\n", - "│ 173 │ │ │ │ │ func_name, │\n", - "│ 174 │ │ │ │ ) │\n", - "│ ❱ 175 │ │ │ │ return func(*args, **kwargs) │\n", - "│ 176 │ │ │\n", - "│ 177 │ │ return wrapper │\n", - "│ 178 │\n", - "│ │\n", - "│ /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/exe │\n", - "│ cution.py:1223 in show_results │\n", - "│ │\n", - "│ 1220 │ │ self.refresh() │\n", - "│ 1221 │ │ │\n", - "│ 1222 │ │ if self.status.overall_status != \"Succeeded\": │\n", - "│ ❱ 1223 │ │ │ raise ValueError( │\n", - "│ 1224 │ │ │ │ f\"Cannot show results. Execution status is '{self.status.overall_status} │\n", - "│ 1225 │ │ │ │ f\"Results are only available after successful execution. \" │\n", - "│ 1226 │ │ │ │ f\"Use execution.wait() to wait for completion or check execution.status │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "ValueError: Cannot show results. Execution status is 'Executing'. Results are only available after successful \n", - "execution. Use execution.wait() to wait for completion or check execution.status for details.\n", - "\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m
[11/22/25 12:24:36] INFO Updating pipeline resource. resources.py:30485\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 12:24:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=707103;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=260368;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py#30485\u001b\\\u001b[2m30485\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker_core.main.resources - Updating pipeline resource.\n", - "INFO - sagemaker.modules.evaluate.execution - Successfully updated pipeline: SagemakerEvaluation-benchmark\n", - "INFO - sagemaker.modules.evaluate.execution - Starting pipeline execution: gen-qa-eval-demo-1763843077\n", - "INFO - sagemaker.modules.evaluate.execution - Pipeline execution started: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n" - ] - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8',\n", - "│ name='gen-qa-eval-demo',\n", - "│ status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 22, 12, 24, 37, 828000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m12\u001b[0m, \u001b[1;36m24\u001b[0m, \u001b[1;36m37\u001b[0m, \u001b[1;36m828000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n", - "Initial Status: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Run evaluation with configured parameters\n", "execution = evaluator.evaluate()\n", @@ -2629,52 +380,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:41:19] INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7: \n", - " s3://mufi-test-serverless-smtj/eval/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:41:19]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=166943;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=816278;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \n", - " s3://mufi-test-serverless-smtj/eval/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=521868;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=351282;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 evaluation(s)\n", - "\n", - " 95qr3e96dblb: Executing\n", - " inlsexrd7jes: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Get all benchmark evaluations (returns iterator)\n", "all_executions_iter = BenchMarkEvaluator.get_all(region=\"us-west-2\")\n", @@ -2698,66 +404,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:2350: UserWarning: Field name \"schema\" in \"AutoMLSnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class AutoMLSnowflakeDatasetDefinition(Base):\n", - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:6372: UserWarning: Field name \"schema\" in \"SnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class SnowflakeDatasetDefinition(Base):\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/22/25 18:32:01] WARNING No boto3 session provided. Creating a new session. utils.py:339\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 18:32:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No boto3 session provided. Creating a new session. \u001b]8;id=549422;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=573139;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#339\u001b\\\u001b[2m339\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING No config provided. Using default config. utils.py:347\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No config provided. Using default config. \u001b]8;id=278829;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=978800;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#347\u001b\\\u001b[2m347\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Succeeded\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "AWS service error when stopping pipeline execution: Pipeline execution with ARN arn:aws:sagemaker:us-west-2:052150106756:pipeline/sagemakerevaluation-benchmark/execution/7rr30o7c2qfb status 'Succeeded'. Only pipelines with 'Executing' status can be stopped.\n" - ] - } - ], + "outputs": [], "source": [ "# Uncomment to stop the job\n", "# existing_execution.stop()\n", diff --git a/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb index 6cf049cb79..c980e42705 100644 --- a/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb +++ b/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -48,19 +48,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Configuration:\n", - " Evaluator: arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1\n", - " Dataset: s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\n", - " Base Model: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - " Output Location: s3://mufi-test-serverless-smtj/eval/\n" - ] - } - ], + "outputs": [], "source": [ "# Evaluator ARN (custom evaluator from AI Registry)\n", "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/00-goga-qa-evaluation/1.0.0\"\n", @@ -103,99 +91,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:33] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:33]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=639873;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=963387;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO Resolved MLflow resource ARN: base_evaluator.py:113\n", - " arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \n", - " mmlu-eval-experiment \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=342593;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=318918;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ CustomScorerEvaluator created successfully\n" - ] - }, - { - "data": { - "text/html": [ - "
CustomScorerEvaluator(\n", - "│ region=None,\n", - "│ sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x116ae9f40>,\n", - "│ model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n", - "│ base_eval_name='eval-meta-1b49b716',\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group=None,\n", - "│ evaluator='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1',\n", - "│ dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n", - "│ evaluate_base_model=False\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mCustomScorerEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x116ae9f40\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-1b49b716'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Create evaluator with custom evaluator ARN\n", "evaluator = CustomScorerEvaluator(\n", @@ -221,156 +119,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:38] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:38]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=848286;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=998219;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching evaluation override parameters for custom_scorer_evaluator.py:236\n", - " hyperparameters property \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for \u001b]8;id=20210;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=113368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#236\u001b\\\u001b[2m236\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m hyperparameters property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching hub content metadata for recipe_utils.py:201\n", - " meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=402391;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385188;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING No region provided. Using default region. utils.py:340\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=442028;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=947914;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Runs on sagemaker us-west-2, region:us-west-2 utils.py:354\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=708289;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=968385;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for evaluation recipe with Type='Evaluation' and recipe_utils.py:221\n", - " EvaluationType='DeterministicEvaluation' \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=711157;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=750371;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Downloading override parameters from recipe_utils.py:249\n", - " s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta- \n", - " textgeneration-llama-3-2-1b-instruct-deterministic_override_params \n", - " _sm_jobs_v1.0.19.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762518;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=755839;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n", - "│ 'max_new_tokens': '8192',\n", - "│ 'temperature': '0',\n", - "│ 'top_k': '-1',\n", - "│ 'top_p': '1.0',\n", - "│ 'aggregation': '',\n", - "│ 'postprocessing': 'False',\n", - "│ 'max_model_len': '12000'\n", - "}\n", - "\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "pprint(evaluator.hyperparameters.to_dict())\n", "\n", @@ -392,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -423,916 +174,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:43] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=201476;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=125527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Getting or creating artifact for source: base_evaluator.py:597\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=336129;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=429516;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for existing artifact for model package: base_evaluator.py:459\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=916341;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=92767;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing artifact: base_evaluator.py:468\n", - " arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \n", - " 138877d772ec489bef \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=110957;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=865654;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Inferred model package group ARN: base_evaluator.py:386\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma from \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=126121;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=198580;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Automatically inferred model_package_group: base_evaluator.py:421\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=183930;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=417470;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using ModelPackage - model_package_group_arn: custom_scorer_evaluator.py:421\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package- \n", - " group/test-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=191140;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=51752;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m group/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved model info - base_model_name: custom_scorer_evaluator.py:424\n", - " meta-textgeneration-llama-3-2-1b-instruct, \n", - " base_model_arn: \n", - " arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu \n", - " blicHub/Model/meta-textgeneration-llama-3-2-1b-instruct \n", - " /1.10.0, source_model_package_arn: \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/ \n", - " test-finetuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=359160;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=935533;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#424\u001b\\\u001b[2m424\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m blicHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m /\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m test-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=189431;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=22751;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using configured hyperparameters: {'max_new_tokens': custom_scorer_evaluator.py:299\n", - " '8192', 'temperature': '0', 'top_k': '-1', 'top_p': \n", - " '1.0', 'aggregation': '', 'postprocessing': 'False', \n", - " 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=536279;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=194605;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#299\u001b\\\u001b[2m299\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using full template for ModelPackage base_evaluator.py:655\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=164880;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=880373;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:44] INFO Resolved template parameters: {'role_arn': base_evaluator.py:693\n", - " 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment', 'mlflow_experiment_name': None, \n", - " 'mlflow_run_name': None, 'model_package_group_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma', 'source_model_package_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28', 'base_model_arn': \n", - " 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', \n", - " 's3_output_path': 's3://mufi-test-serverless-smtj/eval/', \n", - " 'dataset_artifact_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef', 'action_arn_prefix': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:action', \n", - " 'dataset_uri': \n", - " 's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task': \n", - " 'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all', \n", - " 'pipeline_name': 'SagemakerEvaluation-Deterministic', \n", - " 'evaluate_base_model': False, 'evaluator_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW \n", - " PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t \n", - " est/0.0.1', 'max_new_tokens': '8192', 'temperature': '0', \n", - " 'top_k': '-1', 'top_p': '1.0', 'aggregation': 'mean', \n", - " 'postprocessing': 'True', 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:44]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=863350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=151185;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m, \u001b[38;2;0;135;0m'evaluator_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m'mean'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'True'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Rendered pipeline definition: base_evaluator.py:702\n", - " { \n", - " \"Version\": \"2020-12-01\", \n", - " \"Metadata\": {}, \n", - " \"MlflowConfig\": { \n", - " \"MlflowResourceArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment\" \n", - " }, \n", - " \"Parameters\": [], \n", - " \"Steps\": [ \n", - " { \n", - " \"Name\": \"CreateEvaluationAction\", \n", - " \"Type\": \"Lineage\", \n", - " \"Arguments\": { \n", - " \"Actions\": [ \n", - " { \n", - " \"ActionName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ActionType\": \"Evaluation\", \n", - " \"Source\": { \n", - " \"SourceUri\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\", \n", - " \"SourceType\": \"ModelPackage\" \n", - " }, \n", - " \"Properties\": { \n", - " \"PipelineExecutionArn\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " }, \n", - " \"PipelineName\": \n", - " \"SagemakerEvaluation-Deterministic\" \n", - " } \n", - " } \n", - " ], \n", - " \"Contexts\": [ \n", - " { \n", - " \"ContextName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ContextType\": \"PipelineExecution\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Action\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Context\" \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Arn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"CustomScorerEvaluation\", \n", - " \"EvaluatorArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW \n", - " PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t \n", - " est/0.0.1\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"task\": \"gen_qa\", \n", - " \"strategy\": \"gen_qa\", \n", - " \"evaluation_metric\": \"all\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\", \n", - " \"max_model_len\": \"12000\", \n", - " \"aggregation\": \"mean\", \n", - " \"postprocessing\": \"True\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \n", - " \"s3://mufi-test-serverless-smtj/eval/\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"AssociateLineage\", \n", - " \"Type\": \"Lineage\", \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"Artifacts\": [ \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " ] \n", - " } \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=395506;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=123517;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomScorerEvaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluatorArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"mean\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"True\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO No existing pipeline found with prefix execution.py:212\n", - " SagemakerEvaluation-CustomScorerEvaluation, creating new one \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m No existing pipeline found with prefix \u001b]8;id=437465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=501901;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#212\u001b\\\u001b[2m212\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation, creating new one \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Creating new pipeline: execution.py:57\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating new pipeline: \u001b]8;id=91501;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=923226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#57\u001b\\\u001b[2m57\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Creating pipeline resource. resources.py:30147\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating pipeline resource. \u001b]8;id=877192;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=410393;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30147\u001b\\\u001b[2m30147\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Successfully created pipeline: execution.py:76\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully created pipeline: \u001b]8;id=802515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=256656;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#76\u001b\\\u001b[2m76\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Waiting for pipeline execution.py:79\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 to be ready... \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Waiting for pipeline \u001b]8;id=984002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=40351;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#79\u001b\\\u001b[2m79\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m to be ready\u001b[33m...\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n",
- "install \"ipywidgets\" for Jupyter support\n",
- " warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
- "\n"
- ],
- "text/plain": [
- "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n",
- "install \"ipywidgets\" for Jupyter support\n",
- " warnings.warn('install \"ipywidgets\" for Jupyter support')\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "INFO Final Resource Status: Active resources.py:30410\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: \u001b[1mActive\u001b[0m \u001b]8;id=750224;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=46929;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30410\u001b\\\u001b[2m30410\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Pipeline execution.py:82\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 is now active and ready for execution \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline \u001b]8;id=674167;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265281;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#82\u001b\\\u001b[2m82\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m is now active and ready for execution \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Starting pipeline execution: eval-meta-1b49b716-1764452564 execution.py:263\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-1b49b716-\u001b[1;36m1764452564\u001b[0m \u001b]8;id=27465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=541837;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:45] INFO Pipeline execution started: execution.py:274\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/executio \n", - " n/u2q2dl1w5aiq \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=368377;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=144012;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e823cbe579c3\u001b[0m/executio \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m n/u2q2dl1w5aiq \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ Evaluation execution started successfully!\n", - " Execution Name: eval-meta-1b49b716\n", - " Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/execution/u2q2dl1w5aiq\n", - " Status: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Start evaluation\n", "execution = evaluator.evaluate()\n", @@ -1354,72 +198,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Executing',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomModel',\n", - "│ │ │ status='Executing',\n", - "│ │ │ start_time='2025-11-29T13:42:45.523000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120ab8f80>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-29T13:42:45.523000-08:00',\n", - "│ │ │ end_time='2025-11-29T13:42:48.017000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120ab8f80\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:48.017000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Check current status\n", "execution.refresh()\n", @@ -1439,64 +220,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n", - "│ Overall Status Succeeded │\n", - "│ Target Status Succeeded │\n", - "│ Elapsed Time 0.9s │\n", - "│ │\n", - "│ Pipeline Steps │\n", - "│ Step Name Status Duration │\n", - "│ AssociateLineage Succeeded 1.9s │\n", - "│ EvaluateCustomModel Succeeded 7462.5s │\n", - "│ CreateEvaluationAction Succeeded 2.5s │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.9s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m7462.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:36] INFO Final Resource Status: Succeeded execution.py:979\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=693225;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=873243;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], + "outputs": [], "source": [ "# Wait for job to complete (with rich visual feedback)\n", "execution.wait(poll=30, timeout=3600)\n", @@ -1506,194 +232,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:21:42] INFO S3 bucket: mufi-test-serverless-smtj, prefix: eval show_results_utils.py:130\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:42]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=425698;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=639097;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted training job name: show_results_utils.py:63\n", - " pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from \n", - " step: EvaluateCustomModel \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=993672;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=652226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for results_*.json in show_results_utils.py:150\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E \n", - " valuateCustomModel-FNSg2Knqlf/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=724854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=324888;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-FNSg2Knqlf/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:168\n", - " eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o \n", - " utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \n", - " -or8pa/eval_results/results_2025-11-29T23-46-45.108093+00-00 \n", - " .json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=770358;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=338226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m46\u001b[0m-\u001b[1;36m45.108093\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:43] INFO Using metrics from key: 'custom|gen_qa_gen_qa|0' (gen_qa or show_results_utils.py:100\n", - " custom_scorer format) \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from key: \u001b[38;2;0;135;0m'custom|gen_qa_gen_qa|0'\u001b[0m \u001b[1m(\u001b[0mgen_qa or \u001b]8;id=904034;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=137242;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#100\u001b\\\u001b[2m100\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m custom_scorer format\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Custom Model Results \n", - "╭────────────────────────────────┬─────────────────╮\n", - "│ Metric │ Value │\n", - "├────────────────────────────────┼─────────────────┤\n", - "│ bleu │ 6.6928 │\n", - "│ bleu_stderr │ 0.7769 │\n", - "│ byoc_failure_count │ 3572.0000 │\n", - "│ em │ 1.26% │\n", - "│ em_stderr │ 0.0019 │\n", - "│ f1 │ 19.13% │\n", - "│ f1_score_quasi │ 25.29% │\n", - "│ f1_score_quasi_stderr │ 0.0049 │\n", - "│ f1_stderr │ 0.0047 │\n", - "│ qem │ 2.21% │\n", - "│ qem_stderr │ 0.0025 │\n", - "│ rouge1 │ 25.73% │\n", - "│ rouge1_stderr │ 0.0047 │\n", - "│ rouge2 │ 19.15% │\n", - "│ rouge2_stderr │ 0.0047 │\n", - "│ rougeL │ 25.04% │\n", - "│ rougeL_stderr │ 0.0047 │\n", - "╰────────────────────────────────┴─────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7769\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbyoc_failure_count \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 3572.0000\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.26%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.13%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.21%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0025\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.73%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.15%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n", - "│ │\n", - "│ │\n", - "│ 📦 Full evaluation artifacts available at: │\n", - "│ │\n", - "│ Custom Model: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non │\n", - "│ e/eval_results/ │\n", - "│ │\n", - "│ │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# show results\n", "execution.show_results()" @@ -1710,25 +251,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker.modules.evaluate.execution - Extracted s3_output_path from training job pipelines-amlk8q2ukw8x-EvaluateCustomModel-VElzvyVY19: s3://mufi-test-serverless-smtj/eval/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Retrieved job: amlk8q2ukw8x\n", - "Status: Succeeded\n" - ] - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import EvaluationPipelineExecution\n", "\n", @@ -1752,18 +277,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 0 custom scorer evaluation(s):\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Get all custom scorer evaluations\n", "all_executions = list(CustomScorerEvaluator.get_all())\n", diff --git a/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb index 8ba50c3ae7..ab76f46a9c 100644 --- a/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb +++ b/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -127,93 +127,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:43:52] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:43:52]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=406523;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=534480;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO Resolved MLflow resource ARN: base_evaluator.py:113\n", - " arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \n", - " mmlu-eval-experiment \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=360312;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=805617;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
LLMAsJudgeEvaluator(\n", - "│ region=None,\n", - "│ sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x15f5c11c0>,\n", - "│ model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n", - "│ base_eval_name='eval-meta-04295d90',\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group=None,\n", - "│ evaluator_model='anthropic.claude-3-5-haiku-20241022-v1:0',\n", - "│ dataset='s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl',\n", - "│ builtin_metrics=['Completeness', 'Faithfulness'],\n", - "│ custom_metrics='[{\"customMetricDefinition\": {\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\", \"ratingScale\": [{\"definition\": \"Good\", \"value\": {\"floatValue\": 1}}, {\"definition\": \"Poor\", \"value\": {\"floatValue\": 0}}]}}]',\n", - "│ evaluate_base_model=False\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAsJudgeEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x15f5c11c0\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator_model\u001b[0m=\u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbuiltin_metrics\u001b[0m=\u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'Completeness'\u001b[0m, \u001b[38;2;0;135;0m'Faithfulness'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mcustom_metrics\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"customMetricDefinition\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprompt\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\\\\nResponse: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprediction\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\", \"ratingScale\": \u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Good\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 1\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m, \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Poor\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 0\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "\n", "# Create evaluator with custom metrics\n", @@ -242,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -303,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -342,1122 +256,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:22:01] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=931878;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=760856;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Getting or creating artifact for source: base_evaluator.py:597\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=179503;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=71430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for existing artifact for model package: base_evaluator.py:459\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=2444;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=787547;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing artifact: base_evaluator.py:468\n", - " arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \n", - " 138877d772ec489bef \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=808361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=665812;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Inferred model package group ARN: base_evaluator.py:386\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma from \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=361400;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=518747;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Automatically inferred model_package_group: base_evaluator.py:421\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=299761;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=867866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using ModelPackage - model_package_group_arn: llm_as_judge_evaluator.py:319\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-g \n", - " roup/test-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=538256;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=292230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#319\u001b\\\u001b[2m319\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-g \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m roup/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved model info - base_model_name: llm_as_judge_evaluator.py:322\n", - " meta-textgeneration-llama-3-2-1b-instruct, \n", - " base_model_arn: \n", - " arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub \n", - " licHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1 \n", - " .10.0, source_model_package_arn: \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/t \n", - " est-finetuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=854970;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=553794;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#322\u001b\\\u001b[2m322\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m licHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/t \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m est-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Uploading custom metrics to S3: llm_as_judge_evaluator.py:220\n", - " s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva \n", - " l-meta-04295d9020251130-002201/custom-metrics.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Uploading custom metrics to S3: \u001b]8;id=657021;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=5404;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#220\u001b\\\u001b[2m220\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Successfully uploaded custom metrics to: llm_as_judge_evaluator.py:228\n", - " s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva \n", - " l-meta-04295d9020251130-002201/custom-metrics.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully uploaded custom metrics to: \u001b]8;id=718083;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=581773;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#228\u001b\\\u001b[2m228\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using full template for ModelPackage base_evaluator.py:655\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=143249;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=489338;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved template parameters: {'role_arn': base_evaluator.py:693\n", - " 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment', 'mlflow_experiment_name': None, \n", - " 'mlflow_run_name': None, 'model_package_group_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma', 'source_model_package_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28', 'base_model_arn': \n", - " 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', \n", - " 's3_output_path': 's3://mufi-test-serverless-smtj/eval', \n", - " 'dataset_artifact_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef', 'action_arn_prefix': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:action', \n", - " 'dataset_uri': \n", - " 's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas \n", - " et/gen_qa.jsonl', 'judge_model_id': \n", - " 'anthropic.claude-3-5-haiku-20241022-v1:0', 'llmaj_metrics': \n", - " '[\"Completeness\", \"Faithfulness\"]', 'custom_metrics_s3_path': \n", - " 's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta- \n", - " 04295d9020251130-002201/custom-metrics.json', 'max_new_tokens': \n", - " '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', \n", - " 'pipeline_name': 'SagemakerModelEvaluationType2-llmaj', \n", - " 'evaluate_base_model': False} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=109479;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=566018;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'judge_model_id'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m, \u001b[38;2;0;135;0m'llmaj_metrics'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\"Completeness\", \"Faithfulness\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m, \u001b[38;2;0;135;0m'custom_metrics_s3_path'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerModelEvaluationType2-llmaj'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Rendered pipeline definition: base_evaluator.py:702\n", - " { \n", - " \"Version\": \"2020-12-01\", \n", - " \"Metadata\": {}, \n", - " \"MlflowConfig\": { \n", - " \"MlflowResourceArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment\" \n", - " }, \n", - " \"Parameters\": [], \n", - " \"Steps\": [ \n", - " { \n", - " \"Name\": \"CreateEvaluationAction\", \n", - " \"Type\": \"Lineage\", \n", - " \"Arguments\": { \n", - " \"Actions\": [ \n", - " { \n", - " \"ActionName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ActionType\": \"Evaluation\", \n", - " \"Source\": { \n", - " \"SourceUri\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\", \n", - " \"SourceType\": \"ModelPackage\" \n", - " }, \n", - " \"Properties\": { \n", - " \"PipelineExecutionArn\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " }, \n", - " \"PipelineName\": \n", - " \"SagemakerModelEvaluationType2-llmaj\" \n", - " } \n", - " } \n", - " ], \n", - " \"Contexts\": [ \n", - " { \n", - " \"ContextName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ContextType\": \"PipelineExecution\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Action\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Context\" \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Arn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomInferenceModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"TrainingJobName\": \"CustomInference\", \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"BenchmarkEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"name\": \"CustomInference\", \n", - " \"task\": \"inference_only\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas \n", - " et/gen_qa.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " }, \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ] \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomModelMetrics\", \n", - " \"Type\": \"Training\", \n", - " \"DependsOn\": [ \n", - " \"EvaluateCustomInferenceModel\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"TrainingJobName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " \"custom-llmaj-eval\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"LLMAJEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " \"custom-llmaj-eval\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " \"judge_model_id\": \n", - " \"anthropic.claude-3-5-haiku-20241022-v1:0\", \n", - " \"inference_data_s3_path\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat \n", - " h\" \n", - " }, \n", - " \"/\", \n", - " { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomInferenceModel.TrainingJobName\" \n", - " }, \n", - " \"/output/output/\", \n", - " \"CustomInference\", \n", - " \"/eval_results/inference_output.jsonl\" \n", - " ] \n", - " } \n", - " }, \n", - " \"output_path\": \"s3://mufi-test-serverless-smtj/eval\", \n", - " \"llmaj_metrics\": \"[\\\"Completeness\\\", \n", - " \\\"Faithfulness\\\"]\", \n", - " \"custom_metrics_s3_path\": \n", - " \"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta- \n", - " 04295d9020251130-002201/custom-metrics.json\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " } \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"AssociateLineage\", \n", - " \"Type\": \"Lineage\", \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"Artifacts\": [ \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-inference-results\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"InferenceResults\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat \n", - " h\" \n", - " } \n", - " } \n", - " }, \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-inference-results\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " ] \n", - " } \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=358999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=565177;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerModelEvaluationType2-llmaj\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"inference_only\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModelMetrics\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"LLMAJEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"judge_model_id\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"anthropic.claude-3-5-haiku-20241022-v1:0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"inference_data_s3_path\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.TrainingJobName\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/output/output/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/eval_results/inference_output.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"output_path\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"llmaj_metrics\"\u001b[0m: \u001b[38;2;0;135;0m\"\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\\\"Completeness\\\", \u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\\\"Faithfulness\\\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom_metrics_s3_path\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"InferenceResults\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:02] INFO Found existing pipeline: execution.py:199\n", - " SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c \n", - " 6e9 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:02]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=729179;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=511166;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline execution.py:202\n", - " SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c \n", - " 6e9 with latest definition \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=567297;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline resource. resources.py:30306\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=897054;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=497721;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:03] INFO Successfully updated pipeline: execution.py:208\n", - " SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c \n", - " 6e9 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=916795;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Starting pipeline execution: eval-meta-04295d90-1764462123 execution.py:263\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-04295d90-\u001b[1;36m1764462123\u001b[0m \u001b]8;id=41189;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=464412;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Pipeline execution started: execution.py:274\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318n \n", - " ngjk32f \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=227887;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844359;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c6e9\u001b[0m/execution/m318n \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m ngjk32f \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Evaluation job started!\n", - "Job ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f\n", - "Job Name: eval-meta-04295d90\n", - "Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
LLMAJEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f',\n", - "│ name='eval-meta-04295d90',\n", - "│ status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 29, 16, 22, 3, 689000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.LLM_AS_JUDGE: 'llmasjudge'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAJEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m16\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m689000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.LLM_AS_JUDGE:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'llmasjudge'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Run evaluation\n", "execution = evaluator.evaluate()\n", @@ -1481,49 +282,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Executing',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Starting',\n", - "│ │ │ start_time='2025-11-29T16:22:04.148000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x1298e7170>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Starting'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T16:22:04.148000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x1298e7170\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Refresh status\n", "execution.refresh()\n", @@ -1543,58 +304,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n", - "│ Overall Status Succeeded │\n", - "│ Target Status Succeeded │\n", - "│ Elapsed Time 1885.8s │\n", - "│ │\n", - "│ Pipeline Steps │\n", - "│ Step Name Status Duration │\n", - "│ AssociateLineage Succeeded 1.9s │\n", - "│ EvaluateCustomModelMetrics Succeeded 1327.1s │\n", - "│ EvaluateCustomInferenceModel Succeeded 554.1s │\n", - "│ CreateEvaluationAction Succeeded 4.5s │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m1885.8s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModelMetrics \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1327.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomInferenceModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m554.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m4.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:53:37] INFO Final Resource Status: Succeeded execution.py:979\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:53:37]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=524139;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=278480;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Wait for job completion (optional)\n", "# This will poll every 5 seconds for up to 1 hour\n", @@ -1603,512 +315,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:07] INFO Extracted training job name: show_results_utils.py:52\n", - " pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \n", - " step: EvaluateCustomModelMetrics (priority: Custom) \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:07]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=177834;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=168478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n", - "│ │\n", - "│ │\n", - "│ 📦 Full evaluation artifacts available at: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/ │\n", - "│ │\n", - "│ │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO S3 bucket: mufi-test-serverless-smtj, prefix: eval show_results_utils.py:341\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=453165;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=425984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#341\u001b\\\u001b[2m341\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted training job name: show_results_utils.py:52\n", - " pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \n", - " step: EvaluateCustomModelMetrics (priority: Custom) \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=324161;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=683512;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for bedrock summary in show_results_utils.py:361\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E \n", - " valuateCustomModelM-lN73ONZ955/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for bedrock summary in \u001b]8;id=308182;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=660550;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#361\u001b\\\u001b[2m361\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModelM-lN73ONZ955/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found bedrock job name: custom-llmaj-eval-m318nngjk32f show_results_utils.py:377\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found bedrock job name: custom-llmaj-eval-m318nngjk32f \u001b]8;id=705765;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=855376;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#377\u001b\\\u001b[2m377\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for JSONL in show_results_utils.py:387\n", - " s3://mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn \n", - " gjk32f/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for JSONL in \u001b]8;id=236968;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=874421;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#387\u001b\\\u001b[2m387\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mgjk32f/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found JSONL: show_results_utils.py:405\n", - " eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \n", - " l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421 \n", - " 4-9c1e-0c0bf2c71fd6_output.jsonl \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found JSONL: \u001b]8;id=648967;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=247115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#405\u001b\\\u001b[2m405\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:413\n", - " eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \n", - " l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421 \n", - " 4-9c1e-0c0bf2c71fd6_output.jsonl \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=234223;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#413\u001b\\\u001b[2m413\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Loaded 3 evaluation results show_results_utils.py:429\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Loaded \u001b[1;36m3\u001b[0m evaluation results \u001b]8;id=139737;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=460642;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#429\u001b\\\u001b[2m429\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
- "═══ Evaluation 1 of 3 ═══\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- "\n",
- "\u001b[1;36m═══ Evaluation 1 of 3 ═══\u001b[0m\n",
- "\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Prompt: What is the next number in this series? 1, 2, 4, 8, 16, ?\n", - "\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m What is the next number in this series? \u001b[1;36m1\u001b[0m, \u001b[1;36m2\u001b[0m, \u001b[1;36m4\u001b[0m, \u001b[1;36m8\u001b[0m, \u001b[1;36m16\u001b[0m, ?\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: The next number in the series is 32.\n", - "\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m The next number in the series is \u001b[1;36m32\u001b[0m.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " Metric Score \n", - " ───────────────────────────────────────────── \n", - " Builtin.Completeness 100.0% \n", - " Builtin.Faithfulness 100.0% \n", - " \n", - "\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
- "═══ Evaluation 2 of 3 ═══\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- "\n",
- "\u001b[1;36m═══ Evaluation 2 of 3 ═══\u001b[0m\n",
- "\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Prompt: What is the symbol that ends the sentence as a question\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mPrompt:\u001b[0m What is the symbol that ends the sentence as a question\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Model Response: The symbol that ends the sentence as a question is: ?\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mModel Response:\u001b[0m The symbol that ends the sentence as a question is: ?\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " Metric Score \n", - " ───────────────────────────────────────────── \n", - " Builtin.Completeness 100.0% \n", - " Builtin.Faithfulness 100.0% \n", - " \n", - "\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
- "═══ Evaluation 3 of 3 ═══\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- "\n",
- "\u001b[1;36m═══ Evaluation 3 of 3 ═══\u001b[0m\n",
- "\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Prompt: Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mPrompt:\u001b[0m Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Model Response: I ate a hamburger today and it was kind of dry.\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mModel Response:\u001b[0m I ate a hamburger today and it was kind of dry.\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " Metric Score \n", - " ───────────────────────────────────────────── \n", - " Builtin.Completeness 0.0% \n", - " Builtin.Faithfulness 0.0% \n", - " \n", - "\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n", - "\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Showing evaluations 1-3 of 3\n", - "\n", - "\n" - ], - "text/plain": [ - "\u001b[1;36mShowing evaluations \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;36m-\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;36m of \u001b[0m\u001b[1;36m3\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n", - "\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Display results\n", "execution.show_results(limit=10, offset=0, show_explanations=False)" @@ -2127,216 +336,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:15] WARNING Could not extract eval_type from ARN: execution.py:146\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -llmasjudge/execution/4hr7446yft1d \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:15]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=315627;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953607;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz: \n", - " s3://mufi-test-serverless-smtj/eval \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=739992;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=203397;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING Could not extract eval_type from ARN: execution.py:146\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -llmasjudge \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=550335;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=858100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING Could not extract eval_type from ARN: execution.py:146\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -llmasjudge/execution/4hr7446yft1d \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=379628;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=725705;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Succeeded',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='AssociateLineage',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:45:57.889000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:45:59.266000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomModelMetrics',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:27:55.641000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:45:56.749000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomInferenceModel',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:18:07.804000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:27:54.474000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:18:05.550000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:18:07.332000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'AssociateLineage'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:57.889000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:59.266000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModelMetrics'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:55.641000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:56.749000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomInferenceModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.804000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:54.474000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:05.550000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.332000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", - "│ in <module>:17 │\n", - "│ │\n", - "│ 14 ) │\n", - "│ 15 pprint(existing_execution.status) │\n", - "│ 16 │\n", - "│ ❱ 17 existing_execution.show_results(limit=5, offset=0, show_explanations=False) │\n", - "│ 18 │\n", - "│ │\n", - "│ /Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/pydantic/main │\n", - "│ .py:1026 in __getattr__ │\n", - "│ │\n", - "│ 1023 │ │ │ │ │ │ return super().__getattribute__(item) # Raises AttributeError i │\n", - "│ 1024 │ │ │ │ │ else: │\n", - "│ 1025 │ │ │ │ │ │ # this is the current error │\n", - "│ ❱ 1026 │ │ │ │ │ │ raise AttributeError(f'{type(self).__name__!r} object has no att │\n", - "│ 1027 │ │ │\n", - "│ 1028 │ │ def __setattr__(self, name: str, value: Any) -> None: │\n", - "│ 1029 │ │ │ if (setattr_handler := self.__pydantic_setattr_handlers__.get(name)) is not │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "AttributeError: 'EvaluationPipelineExecution' object has no attribute 'show_results'\n", - "\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m
[11/29/25 17:02:21] INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955: \n", - " s3://mufi-test-serverless-smtj/eval \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=802368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 LLM-as-Judge evaluation jobs\n", - " - m318nngjk32f: Succeeded\n", - " - 2m5hczli7vdp: Failed\n" - ] - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import LLMAsJudgeEvaluator\n", "\n", diff --git a/v3-examples/inference-examples/optimize-example.ipynb b/v3-examples/inference-examples/optimize-example.ipynb index 4ad4f3b513..dfb28af984 100644 --- a/v3-examples/inference-examples/optimize-example.ipynb +++ b/v3-examples/inference-examples/optimize-example.ipynb @@ -54,8 +54,8 @@ "MODEL_ID = \"meta-textgeneration-llama-3-8b-instruct\"\n", "MODEL_NAME_PREFIX = \"jumpstart-optimize-example\"\n", "ENDPOINT_NAME_PREFIX = \"jumpstart-optimize-example-endpoint\"\n", - "AWS_ACCOUNT_ID = \"593793038179\"\n", - "AWS_REGION = \"us-east-2\"\n", + "AWS_ACCOUNT_ID = Session.account_id()\n", + "AWS_REGION = Session.boto_region_name\n", "\n", "# Generate unique identifiers\n", "unique_id = str(uuid.uuid4())[:8]\n", diff --git a/v3-examples/inference-examples/train-inference-e2e-example.ipynb b/v3-examples/inference-examples/train-inference-e2e-example.ipynb index 7a9d45e476..ee9b0ac0c7 100644 --- a/v3-examples/inference-examples/train-inference-e2e-example.ipynb +++ b/v3-examples/inference-examples/train-inference-e2e-example.ipynb @@ -61,7 +61,7 @@ "TRAINING_JOB_PREFIX = \"e2e-v3-pytorch\"\n", "\n", "# AWS Configuration\n", - "AWS_REGION = \"us-west-2\"\n", + "AWS_REGION = Session.boto_region_name\n", "PYTORCH_TRAINING_IMAGE = f\"763104351884.dkr.ecr.{AWS_REGION}.amazonaws.com/pytorch-training:1.13.1-cpu-py39\"\n", "\n", "# Generate unique identifiers\n", diff --git a/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb b/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb index 0acd6fdf54..80435c9325 100644 --- a/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb +++ b/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb @@ -34,8 +34,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Install from local SDK for development (includes fixes for MLflow path resolution issues)\n", - "%pip install -e ../../sagemaker-core -e ../../sagemaker-train -e ../../sagemaker-serve -e ../../sagemaker-mlops -e ../../. \"mlflow==3.4.0\" --upgrade" + "# Install fix for MLflow path resolution issues\n", + "%pip install mlflow==3.4.0" ] }, { @@ -62,6 +62,7 @@ "source": [ "import uuid\n", "from sagemaker.core import image_uris\n", + "from sagemaker.core.helper.session_helper import Session\n", "\n", "# =============================================================================\n", "# MLflow Configuration - UPDATE THIS WITH YOUR TRACKING SERVER ARN\n", @@ -70,7 +71,7 @@ "MLFLOW_TRACKING_ARN = \"XXXXX\"\n", "\n", "# AWS Configuration\n", - "AWS_REGION = \"us-east-1\"\n", + "AWS_REGION = Session.boto_region_name\n", "\n", "# Get PyTorch training image dynamically\n", "PYTORCH_TRAINING_IMAGE = image_uris.retrieve(\n", @@ -556,7 +557,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "py3.10.14", "language": "python", "name": "python3" }, @@ -570,7 +571,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb b/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb index d307261bbf..6dc3427047 100644 --- a/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb +++ b/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb @@ -23,15 +23,17 @@ "from sagemaker.core import image_uris\n", "import boto3\n", "\n", + "sagemaker_session = Session()\n", + "role = get_execution_role()\n", + "region = sagemaker_session.boto_region_name\n", + "\n", "image_uri = image_uris.retrieve(\n", " framework=\"xgboost\",\n", - " region=\"us-east-1\",\n", + " region=region,\n", " version=\"1.0-1\",\n", " py_version=\"py3\",\n", " instance_type=\"ml.m5.xlarge\",\n", - ")\n", - "sagemaker_session = Session()\n", - "role = get_execution_role()" + ")" ] }, { @@ -181,7 +183,7 @@ "# approve the version before creating model\n", "\n", "# There is a gap that API response for a versioned model package doesn't include model_package_name\n", - "sagemaker_client = boto3.client('sagemaker', region_name='us-east-1')\n", + "sagemaker_client = boto3.client('sagemaker', region_name=region)\n", "sagemaker_client.update_model_package(\n", " ModelPackageArn=registered_model_package_arn,\n", " ModelApprovalStatus=\"Approved\"\n", diff --git a/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb index 20c51e562e..4e49266323 100644 --- a/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb +++ b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb @@ -45,16 +45,6 @@ "## Setup and Dependencies" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "84cf410f", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade sagemaker --quiet # restart the kernel after running this cell" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/v3-examples/sagemaker_v3_setup.ipynb b/v3-examples/sagemaker_v3_setup.ipynb new file mode 100644 index 0000000000..6e25fb5acb --- /dev/null +++ b/v3-examples/sagemaker_v3_setup.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SageMaker Python SDK v3+ Setup\n", + "\n", + "This notebook helps you upgrade to SageMaker Python SDK v3+ and verify the installation.\n", + "\n", + "**⚠️ Important:** After running this notebook, restart your kernel before using SageMaker v3." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Check Current Version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip show sagemaker | grep Version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Install/Upgrade\n", + "\n", + "Choose **ONE** of the following methods:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method 1: Standard Upgrade (Try this first)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade sagemaker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method 2: Force Reinstall (If Method 1 fails)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --force-reinstall --no-cache-dir sagemaker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method 3: Clean Uninstall + Reinstall (If residual files exist)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip uninstall -y sagemaker sagemaker-core sagemaker-train sagemaker-serve sagemaker-mlops\n", + "!pip cache purge\n", + "!pip install sagemaker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Restart Kernel\n", + "\n", + "**⚠️ REQUIRED:** Click **Kernel → Restart Kernel** in the menu above, then continue to Step 4." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Verify Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip show sagemaker | grep Version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Verify Core Components" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from sagemaker.core.helper.session_helper import Session\n", + " from sagemaker.train import ModelTrainer\n", + " from sagemaker.serve import ModelBuilder\n", + " from sagemaker.mlops.workflow.pipeline import Pipeline\n", + " print(\"✓ Core v3 modules imported successfully\")\n", + " \n", + " session = Session()\n", + " print(f\"✓ Session created - Region: {session.boto_region_name}\")\n", + " print(f\"✓ Default bucket: {session.default_bucket()}\")\n", + " \n", + "except ImportError as e:\n", + " print(f\"✗ Import failed: {e}\")\n", + " print(\"Try Method 3 (Clean Uninstall + Reinstall)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Troubleshooting\n", + "\n", + "### Version still shows v2.x after upgrade\n", + "1. Restart kernel (Kernel → Restart Kernel)\n", + "2. Use Method 3 (Clean Uninstall + Reinstall)\n", + "\n", + "### Import errors after upgrade\n", + "Clear Python cache and restart kernel:\n", + "```python\n", + "!find . -type d -name __pycache__ -exec rm -r {} +\n", + "!find . -type f -name '*.pyc' -delete\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/v3-examples/training-examples/distributed-local-training-example.ipynb b/v3-examples/training-examples/distributed-local-training-example.ipynb index 3c607979a4..ece9f5dbd8 100644 --- a/v3-examples/training-examples/distributed-local-training-example.ipynb +++ b/v3-examples/training-examples/distributed-local-training-example.ipynb @@ -6,7 +6,7 @@ "source": [ "# SageMaker V3 Distributed Local Training Example\n", "\n", - "This notebook demonstrates how to run distributed training locally using SageMaker V3 ModelTrainer with multiple Docker containers." + "This notebook demonstrates how to run distributed training locally using SageMaker V3 ModelTrainer with multiple Docker containers. Note: This notebook will not run in SageMaker Studio. " ] }, { diff --git a/v3-examples/training-examples/local-training-example.ipynb b/v3-examples/training-examples/local-training-example.ipynb index 0c2f09ccfe..a279378e49 100644 --- a/v3-examples/training-examples/local-training-example.ipynb +++ b/v3-examples/training-examples/local-training-example.ipynb @@ -6,7 +6,8 @@ "source": [ "# SageMaker V3 Local Training Example\n", "\n", - "This notebook demonstrates how to use SageMaker V3 ModelTrainer in Local Container mode for testing training jobs in Docker containers locally." + "This notebook demonstrates how to use SageMaker V3 ModelTrainer in Local Container mode for testing training jobs in Docker containers locally. \n", + "Note: This notebook will not run in SageMaker Studio. " ] }, {