diff --git a/sagemaker-core/example_notebooks/get_started.ipynb b/sagemaker-core/example_notebooks/get_started.ipynb index ee1a29e412..e3d5b9daf6 100644 --- a/sagemaker-core/example_notebooks/get_started.ipynb +++ b/sagemaker-core/example_notebooks/get_started.ipynb @@ -187,7 +187,13 @@ "metadata": {}, "outputs": [], "source": [ - "image = '141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-xgboost:1.7-1'" + "from sagemaker.core import image_uris\n", + "\n", + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='1.7-1'\n", + ")" ] }, { @@ -687,7 +693,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "temp_env", "language": "python", "name": "python3" }, diff --git a/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb b/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb index 0a410056a5..06c03d8765 100644 --- a/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb +++ b/sagemaker-core/example_notebooks/inference_and_resource_chaining.ipynb @@ -241,11 +241,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Image name is hardcoded here\n", - "# Image name can be programatically got by using sagemaker package and calling image_uris.retrieve\n", - "# Since that is a high level abstraction that has multiple dependencies, the image URIs functionalities will live in sagemaker (V2)\n", + "from sagemaker.core import image_uris\n", "\n", - "image = \"433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest\"" + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='latest'\n", + ")" ] }, { diff --git a/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb b/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb index ff3bdeda2f..aabdaba7d8 100644 --- a/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb +++ b/sagemaker-core/example_notebooks/intelligent_defaults_and_logging.ipynb @@ -269,11 +269,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Image name is hardcoded here\n", - "# Image name can be programatically got by using sagemaker package and calling image_uris.retrieve\n", - "# Since that is a high level abstraction that has multiple dependencies, the image URIs functionalities will live in sagemaker (V2)\n", + "from sagemaker.core import image_uris\n", "\n", - "image = \"433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest\"" + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='latest'\n", + ")" ] }, { diff --git a/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb b/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb index 819a65c3ec..f68658ea4c 100644 --- a/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb +++ b/sagemaker-core/example_notebooks/sagemaker_core_overview.ipynb @@ -255,11 +255,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Image name is hardcoded here\n", - "# Image name can be programatically got by using sagemaker package and calling image_uris.retrieve\n", - "# Since that is a high level abstraction that has multiple dependencies, the image URIs functionalities will live in sagemaker (V2)\n", + "from sagemaker.core import image_uris\n", "\n", - "image = \"433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest\"" + "image = image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version='latest'\n", + ")" ] }, { diff --git a/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb index 5cb75f506c..e0133a9272 100644 --- a/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb +++ b/sagemaker-train/example_notebooks/evaluate/benchmark_demo.ipynb @@ -21,74 +21,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n",
-       "<_Benchmark.MMLU: 'mmlu'>,\n",
-       "<_Benchmark.MMLU_PRO: 'mmlu_pro'>,\n",
-       "<_Benchmark.BBH: 'bbh'>,\n",
-       "<_Benchmark.GPQA: 'gpqa'>,\n",
-       "<_Benchmark.MATH: 'math'>,\n",
-       "<_Benchmark.STRONG_REJECT: 'strong_reject'>,\n",
-       "<_Benchmark.IFEVAL: 'ifeval'>,\n",
-       "<_Benchmark.GEN_QA: 'gen_qa'>,\n",
-       "<_Benchmark.MMMU: 'mmmu'>,\n",
-       "<_Benchmark.LLM_JUDGE: 'llm_judge'>,\n",
-       "<_Benchmark.INFERENCE_ONLY: 'inference_only'>\n",
-       "]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225m_Benchmark.MMLU:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'mmlu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMLU_PRO: \u001b[0m\u001b[38;2;0;135;0m'mmlu_pro'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.BBH: \u001b[0m\u001b[38;2;0;135;0m'bbh'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GPQA: \u001b[0m\u001b[38;2;0;135;0m'gpqa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MATH: \u001b[0m\u001b[38;2;0;135;0m'math'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.STRONG_REJECT: \u001b[0m\u001b[38;2;0;135;0m'strong_reject'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.IFEVAL: \u001b[0m\u001b[38;2;0;135;0m'ifeval'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMMU: \u001b[0m\u001b[38;2;0;135;0m'mmmu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.LLM_JUDGE: \u001b[0m\u001b[38;2;0;135;0m'llm_judge'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.INFERENCE_ONLY: \u001b[0m\u001b[38;2;0;135;0m'inference_only'\u001b[0m\u001b[1m>\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n",
-       "'modality': 'Multi-Modal (image)',\n",
-       "'description': 'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.',\n",
-       "'metrics': ['all'],\n",
-       "'strategy': 'gen_qa',\n",
-       "'subtask_available': False,\n",
-       "'subtasks': None\n",
-       "}\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'modality'\u001b[0m: \u001b[38;2;0;135;0m'Multi-Modal \u001b[0m\u001b[1;38;2;0;135;0m(\u001b[0m\u001b[38;2;0;135;0mimage\u001b[0m\u001b[1;38;2;0;135;0m)\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'description'\u001b[0m: \u001b[38;2;0;135;0m'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'metrics'\u001b[0m: \u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'all'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtask_available'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtasks'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import get_benchmarks, get_benchmark_properties\n", "from rich.pretty import pprint\n", @@ -134,108 +69,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:39:45] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:39:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=314173;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126855;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved MLflow resource ARN:                                    base_evaluator.py:113\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/                      \n",
-       "                             mmlu-eval-experiment                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=480390;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=329695;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Model package group provided as ARN:                             base_evaluator.py:145\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa                      \n",
-       "                             mple-name-aovqo                                                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Model package group provided as ARN: \u001b]8;id=572070;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=299487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#145\u001b\\\u001b[2m145\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchMarkEvaluator(\n",
-       "region=None,\n",
-       "sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x13cd28e60>,\n",
-       "model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n",
-       "base_eval_name='gen-qa-eval-demo',\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group='arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo',\n",
-       "benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n",
-       "subtasks=None,\n",
-       "dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n",
-       "evaluate_base_model=True\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x13cd28e60\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import BenchMarkEvaluator\n", "\n", @@ -260,43 +94,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
-       " in <module>:13                                                                                   \n",
-       "                                                                                                  \n",
-       "   10 # Create evaluator with GEN_QA benchmark                                                    \n",
-       "   11 # These values match our successfully tested configuration                                  \n",
-       "   12 evaluator = BenchMarkEvaluator(                                                             \n",
-       " 13 benchmark=Benchmark.GEN_QA,                                                             \n",
-       "   14 model=\"meta-textgeneration-llama-3-2-1b-instruct\",                                      \n",
-       "   15 s3_output_path=\"s3://mufi-test-serverless-smtj/eval/\",                                  \n",
-       "   16 mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server    \n",
-       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "NameError: name 'Benchmark' is not defined\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in :13 \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m10 \u001b[0m\u001b[2m# Create evaluator with GEN_QA benchmark\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m11 \u001b[0m\u001b[2m# These values match our successfully tested configuration\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m12 \u001b[0mevaluator = BenchMarkEvaluator( \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m13 \u001b[2m│ \u001b[0mbenchmark=\u001b[1;4mBenchmark\u001b[0m.GEN_QA, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m14 \u001b[0m\u001b[2m│ \u001b[0mmodel=\u001b[33m\"\u001b[0m\u001b[33mmeta-textgeneration-llama-3-2-1b-instruct\u001b[0m\u001b[33m\"\u001b[0m, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m15 \u001b[0m\u001b[2m│ \u001b[0ms3_output_path=\u001b[33m\"\u001b[0m\u001b[33ms3://mufi-test-serverless-smtj/eval/\u001b[0m\u001b[33m\"\u001b[0m, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m16 \u001b[0m\u001b[2m│ \u001b[0mmlflow_resource_arn=\u001b[33m\"\u001b[0m\u001b[33marn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", - "\u001b[1;91mNameError: \u001b[0mname \u001b[38;2;0;135;0m'Benchmark'\u001b[0m is not defined\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# # [Optional] BASE MODEL EVAL\n", "\n", @@ -323,61 +121,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - botocore.credentials - Found credentials in shared credentials file: ~/.aws/credentials\n", - "INFO - sagemaker.modules.evaluate.base_evaluator - Model package group provided as ARN: arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models\n" - ] - }, - { - "data": { - "text/html": [ - "
BenchMarkEvaluator(\n",
-       "region='us-east-1',\n",
-       "sagemaker_session=<sagemaker_core.helper.session_helper.Session object at 0x356a03950>,\n",
-       "model='arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3',\n",
-       "base_eval_name='gen-qa-eval-demo',\n",
-       "s3_output_path='s3://mufi-test-serverless-iad/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group='arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models',\n",
-       "benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n",
-       "subtasks=None,\n",
-       "dataset='s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n",
-       "evaluate_base_model=True\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[38;2;0;135;0m'us-east-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker_core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x356a03950\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-iad/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# # [Optional] Nova testing IAD Prod\n", "\n", @@ -411,156 +155,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:26:31] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:26:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=665742;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=28065;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n",
-       "                             property                                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=668827;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=344195;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching hub content metadata for                                  recipe_utils.py:201\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=912465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=530916;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  No region provided. Using default region.                                 utils.py:340\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=483608;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=394176;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Runs on sagemaker us-west-2, region:us-west-2                             utils.py:354\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=127187;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=740445;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for evaluation recipe with Type='Evaluation' and         recipe_utils.py:221\n",
-       "                             EvaluationType='DeterministicEvaluation'                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=26417;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=309515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Downloading override parameters from                               recipe_utils.py:249\n",
-       "                             s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta-                    \n",
-       "                             textgeneration-llama-3-2-1b-instruct-deterministic_override_params                    \n",
-       "                             _sm_jobs_v1.0.19.json                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762738;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=1149;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n",
-       "'max_new_tokens': '8192',\n",
-       "'temperature': '0',\n",
-       "'top_k': '-1',\n",
-       "'top_p': '1.0',\n",
-       "'aggregation': '',\n",
-       "'postprocessing': 'False',\n",
-       "'max_model_len': '12000'\n",
-       "}\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "pprint(evaluator.hyperparameters.to_dict())\n", "\n", @@ -591,1114 +188,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:40:20] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=39435;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=899931;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Getting or creating artifact for source:                         base_evaluator.py:597\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=774478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=222956;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for existing artifact for model package:               base_evaluator.py:459\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=672788;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=533927;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing artifact:                                         base_evaluator.py:468\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3                      \n",
-       "                             138877d772ec489bef                                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=555230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=311641;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using resolved model_package_group ARN:                          base_evaluator.py:414\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa                      \n",
-       "                             mple-name-aovqo                                                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using resolved model_package_group ARN: \u001b]8;id=350625;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=393598;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#414\u001b\\\u001b[2m414\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using ModelPackage - model_package_group_arn:               benchmark_evaluator.py:644\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-grou                           \n",
-       "                             p/example-name-aovqo                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=534430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=895229;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#644\u001b\\\u001b[2m644\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-grou \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m p/example-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved model info - base_model_name:                      benchmark_evaluator.py:647\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct, base_model_arn:                            \n",
-       "                             arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic                           \n",
-       "                             Hub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0,                           \n",
-       "                              source_model_package_arn:                                                            \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test                           \n",
-       "                             -finetuned-models-gamma/28                                                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=1084;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=849460;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#647\u001b\\\u001b[2m647\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m Hub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=537782;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=387290;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n",
-       "                             property                                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=706064;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=284205;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching hub content metadata for                                  recipe_utils.py:201\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=502448;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=531984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for evaluation recipe with Type='Evaluation' and         recipe_utils.py:221\n",
-       "                             EvaluationType='DeterministicEvaluation'                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=67072;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=119115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Downloading override parameters from                               recipe_utils.py:249\n",
-       "                             s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta-                    \n",
-       "                             textgeneration-llama-3-2-1b-instruct-deterministic_override_params                    \n",
-       "                             _sm_jobs_v1.0.19.json                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=954396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=959350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:21] INFO     Using configured hyperparameters: {'max_new_tokens':        benchmark_evaluator.py:568\n",
-       "                             '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0',                            \n",
-       "                             'aggregation': '', 'postprocessing': 'False',                                         \n",
-       "                             'max_model_len': '12000'}                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=584498;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126531;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#568\u001b\\\u001b[2m568\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using full template for ModelPackage                             base_evaluator.py:655\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=556396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=773270;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved template parameters: {'role_arn':                       base_evaluator.py:693\n",
-       "                             'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn':                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment', 'mlflow_experiment_name': None,                               \n",
-       "                             'mlflow_run_name': None, 'model_package_group_arn':                                   \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex                      \n",
-       "                             ample-name-aovqo', 'source_model_package_arn':                                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28', 'base_model_arn':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0',                              \n",
-       "                             's3_output_path': 's3://mufi-test-serverless-smtj/eval/',                             \n",
-       "                             'dataset_artifact_arn':                                                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef', 'action_arn_prefix':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:action',                                    \n",
-       "                             'dataset_uri':                                                                        \n",
-       "                             's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task':                        \n",
-       "                             'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all',                           \n",
-       "                             'subtask': '', 'pipeline_name':                                                       \n",
-       "                             'SagemakerEvaluation-Deterministic', 'evaluate_base_model':                           \n",
-       "                             True, 'max_new_tokens': '8192', 'temperature': '0', 'top_k':                          \n",
-       "                             '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing':                            \n",
-       "                             'False', 'max_model_len': '12000'}                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=970601;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=386360;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'subtask'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[3;38;2;0;135;0mTrue\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Rendered pipeline definition:                                    base_evaluator.py:702\n",
-       "                             {                                                                                     \n",
-       "                               \"Version\": \"2020-12-01\",                                                            \n",
-       "                               \"Metadata\": {},                                                                     \n",
-       "                               \"MlflowConfig\": {                                                                   \n",
-       "                                 \"MlflowResourceArn\":                                                              \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment\"                                                                \n",
-       "                               },                                                                                  \n",
-       "                               \"Parameters\": [],                                                                   \n",
-       "                               \"Steps\": [                                                                          \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"CreateEvaluationAction\",                                               \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Actions\": [                                                                  \n",
-       "                                       {                                                                           \n",
-       "                                         \"ActionName\": {                                                           \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ActionType\": \"Evaluation\",                                               \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\":                                                            \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\",                                                              \n",
-       "                                           \"SourceType\": \"ModelPackage\"                                            \n",
-       "                                         },                                                                        \n",
-       "                                         \"Properties\": {                                                           \n",
-       "                                           \"PipelineExecutionArn\": {                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           },                                                                      \n",
-       "                                           \"PipelineName\":                                                         \n",
-       "                             \"SagemakerEvaluation-Deterministic\"                                                   \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Contexts\": [                                                                 \n",
-       "                                       {                                                                           \n",
-       "                                         \"ContextName\": {                                                          \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ContextType\": \"PipelineExecution\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Action\"                                                        \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Context\"                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Arn\":                                                                  \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef\"                                                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateBaseModel\",                                                    \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex                      \n",
-       "                             ample-name-aovqo\",                                                                    \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"BenchmarkEvaluation\"                                     \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"task\": \"gen_qa\",                                                           \n",
-       "                                       \"strategy\": \"gen_qa\",                                                       \n",
-       "                                       \"evaluation_metric\": \"all\",                                                 \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\",                                                             \n",
-       "                                       \"max_model_len\": \"12000\",                                                   \n",
-       "                                       \"aggregation\": \"\",                                                          \n",
-       "                                       \"postprocessing\": \"False\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\":                                                             \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/\",                                               \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"                                 \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomModel\",                                                  \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex                      \n",
-       "                             ample-name-aovqo\",                                                                    \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"BenchmarkEvaluation\"                                     \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"task\": \"gen_qa\",                                                           \n",
-       "                                       \"strategy\": \"gen_qa\",                                                       \n",
-       "                                       \"evaluation_metric\": \"all\",                                                 \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\",                                                             \n",
-       "                                       \"max_model_len\": \"12000\",                                                   \n",
-       "                                       \"aggregation\": \"\",                                                          \n",
-       "                                       \"postprocessing\": \"False\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\":                                                             \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/\",                                               \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"                                 \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"AssociateLineage\",                                                     \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Artifacts\": [                                                                \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"base-eval-report\"                                                  \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-eval-report\"                                                \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"                             \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"base-eval-report\"                                                \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-eval-report\"                                              \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 }                                                                                 \n",
-       "                               ]                                                                                   \n",
-       "                             }                                                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=330131;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=262009;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateBaseModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing pipeline:                                              execution.py:199\n",
-       "                             SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2                 \n",
-       "                             9171c42                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=588942;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=925025;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline                                                     execution.py:202\n",
-       "                             SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2                 \n",
-       "                             9171c42 with latest definition                                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=746487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=234699;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline resource.                                         resources.py:30306\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=908194;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=233215;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:22] INFO     Successfully updated pipeline:                                        execution.py:208\n",
-       "                             SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2                 \n",
-       "                             9171c42                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=321336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=381496;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Starting pipeline execution: gen-qa-eval-demo-1764452422              execution.py:263\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: gen-qa-eval-demo-\u001b[1;36m1764452422\u001b[0m \u001b]8;id=359442;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=958972;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Pipeline execution started:                                           execution.py:274\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/9                 \n",
-       "                             5qr3e96dblb                                                                           \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=73999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b29171c42\u001b[0m/execution/9 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 5qr3e96dblb \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb',\n",
-       "name='gen-qa-eval-demo',\n",
-       "status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 29, 13, 40, 22, 284000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m40\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m284000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb\n", - "Initial Status: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Run evaluation with configured parameters\n", "execution = evaluator.evaluate()\n", @@ -1739,92 +231,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Executing',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomModel',\n",
-       "│   │   │   status='Executing',\n",
-       "│   │   │   start_time='2025-11-29T13:26:38.084000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateBaseModel',\n",
-       "│   │   │   status='Executing',\n",
-       "│   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   end_time='2025-11-29T13:26:42.759000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:42.759000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Step Details:\n", - " EvaluateCustomModel: Executing\n", - " EvaluateBaseModel: Executing\n", - " CreateEvaluationAction: Succeeded\n" - ] - } - ], + "outputs": [], "source": [ "# Refresh status\n", "execution.refresh()\n", @@ -1850,66 +259,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n",
-       "  Overall Status        Succeeded                                                                                \n",
-       "  Target Status         Succeeded                                                                                \n",
-       "  Elapsed Time          0.5s                                                                                     \n",
-       "                                                                                                                 \n",
-       " Pipeline Steps                                                                                                  \n",
-       "  Step Name                       Status           Duration                                                      \n",
-       "  AssociateLineage                Succeeded        3.3s                                                          \n",
-       "  EvaluateCustomModel             Succeeded        3714.0s                                                       \n",
-       "  EvaluateBaseModel               Succeeded        5366.2s                                                       \n",
-       "  CreateEvaluationAction          Succeeded        2.7s                                                          \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.5s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3.3s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3714.0s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateBaseModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m5366.2s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.7s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:20] INFO     Final Resource Status: Succeeded                                      execution.py:979\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=401306;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=749;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], + "outputs": [], "source": [ "# Wait for job completion with progress updates\n", "# This will show a rich progress display in Jupyter\n", @@ -1956,343 +308,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
's3://mufi-test-serverless-smtj/eval/'\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:25] INFO     S3 bucket: mufi-test-serverless-smtj, prefix: eval           show_results_utils.py:130\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:25]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=671086;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=908024;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted training job name:                                  show_results_utils.py:63\n",
-       "                             pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from                            \n",
-       "                             step: EvaluateCustomModel                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=813615;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=57499;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:26] INFO     Extracted training job name:                                  show_results_utils.py:63\n",
-       "                             pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from                              \n",
-       "                             step: EvaluateBaseModel                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:26]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=745707;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953308;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateBaseModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for results_*.json in                              show_results_utils.py:150\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E                          \n",
-       "                             valuateCustomModel-F51y8F3Pg7/output/output/                                          \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=805603;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=739949;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-F51y8F3Pg7/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:168\n",
-       "                             eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o                          \n",
-       "                             utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct-                          \n",
-       "                             -or8pa/eval_results/results_2025-11-29T22-41-53.186048+00-00                          \n",
-       "                             .json                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=188825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=667854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T22-\u001b[1;36m41\u001b[0m-\u001b[1;36m53.186048\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for results_*.json in                              show_results_utils.py:150\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E                          \n",
-       "                             valuateBaseModel-VA9YzcdIVI/output/output/                                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=270113;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844454;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateBaseModel-VA9YzcdIVI/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:168\n",
-       "                             eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out                          \n",
-       "                             put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o                          \n",
-       "                             r8pa/eval_results/results_2025-11-29T23-09-21.277725+00-00.j                          \n",
-       "                             son                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=221667;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=736866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m r8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m09\u001b[0m-\u001b[1;36m21.277725\u001b[0m+\u001b[1;36m00-00.j\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m son \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using metrics from 'all' key (standard benchmark format)      show_results_utils.py:93\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=431825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75452;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using metrics from 'all' key (standard benchmark format)      show_results_utils.py:93\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=866976;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=697222;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                Custom Model Results                \n",
-       "╭────────────────────────────────┬─────────────────╮\n",
-       "│ Metric                                    Value │\n",
-       "├────────────────────────────────┼─────────────────┤\n",
-       "│ bleu                                     6.6928 │\n",
-       "│ bleu_stderr                              0.7801 │\n",
-       "│ em                                        1.23% │\n",
-       "│ em_stderr                                0.0018 │\n",
-       "│ f1                                       19.04% │\n",
-       "│ f1_score_quasi                           25.25% │\n",
-       "│ f1_score_quasi_stderr                    0.0049 │\n",
-       "│ f1_stderr                                0.0047 │\n",
-       "│ qem                                       2.16% │\n",
-       "│ qem_stderr                               0.0024 │\n",
-       "│ rouge1                                   25.69% │\n",
-       "│ rouge1_stderr                            0.0047 │\n",
-       "│ rouge2                                   19.09% │\n",
-       "│ rouge2_stderr                            0.0047 │\n",
-       "│ rougeL                                   25.02% │\n",
-       "│ rougeL_stderr                            0.0047 │\n",
-       "╰────────────────────────────────┴─────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7801\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.23%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0018\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.25%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.16%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.69%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.02%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                 Base Model Results                 \n",
-       "╭────────────────────────────────┬─────────────────╮\n",
-       "│ Metric                                    Value │\n",
-       "├────────────────────────────────┼─────────────────┤\n",
-       "│ bleu                                     6.6928 │\n",
-       "│ bleu_stderr                              0.7803 │\n",
-       "│ em                                        1.29% │\n",
-       "│ em_stderr                                0.0019 │\n",
-       "│ f1                                       19.09% │\n",
-       "│ f1_score_quasi                           25.22% │\n",
-       "│ f1_score_quasi_stderr                    0.0049 │\n",
-       "│ f1_stderr                                0.0047 │\n",
-       "│ qem                                       2.18% │\n",
-       "│ qem_stderr                               0.0024 │\n",
-       "│ rouge1                                   25.61% │\n",
-       "│ rouge1_stderr                            0.0047 │\n",
-       "│ rouge2                                   19.04% │\n",
-       "│ rouge2_stderr                            0.0047 │\n",
-       "│ rougeL                                   24.95% │\n",
-       "│ rougeL_stderr                            0.0047 │\n",
-       "╰────────────────────────────────┴─────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;33mBase Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;33m \u001b[0m\u001b[1;33mMetric \u001b[0m\u001b[1;33m \u001b[0m│\u001b[1;33m \u001b[0m\u001b[1;33m Value\u001b[0m\u001b[1;33m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7803\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.22%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.18%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.61%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 24.95%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "  📦 Full evaluation artifacts available at:                                                                     \n",
-       "                                                                                                                 \n",
-       "  Custom Model:                                                                                                  \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non  \n",
-       "  e/eval_results/                                                                                                \n",
-       "                                                                                                                 \n",
-       "  Base Model:                                                                                                    \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/  \n",
-       "  eval_results/                                                                                                  \n",
-       "                                                                                                                 \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;33mBase Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36meval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "pprint(execution.s3_output_path)\n", "# Display results in a formatted table\n", @@ -2312,197 +330,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:35:47] INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q:                                \n",
-       "                             s3://mufi-test-serverless-smtj/eval/                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:35:47]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=148252;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes',\n",
-       "name='inlsexrd7jes',\n",
-       "status=PipelineExecutionStatus(\n",
-       "│   │   overall_status='Executing',\n",
-       "│   │   step_details=[\n",
-       "│   │   │   StepDetail(\n",
-       "│   │   │   │   name='EvaluateCustomModel',\n",
-       "│   │   │   │   status='Executing',\n",
-       "│   │   │   │   start_time='2025-11-29T13:26:38.084000-08:00',\n",
-       "│   │   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   │   display_name=None,\n",
-       "│   │   │   │   failure_reason=None\n",
-       "│   │   │   ),\n",
-       "│   │   │   StepDetail(\n",
-       "│   │   │   │   name='EvaluateBaseModel',\n",
-       "│   │   │   │   status='Executing',\n",
-       "│   │   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n",
-       "│   │   │   │   display_name=None,\n",
-       "│   │   │   │   failure_reason=None\n",
-       "│   │   │   ),\n",
-       "│   │   │   StepDetail(\n",
-       "│   │   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   │   status='Succeeded',\n",
-       "│   │   │   │   start_time='2025-11-29T13:26:38.083000-08:00',\n",
-       "│   │   │   │   end_time='2025-11-29T13:26:42.759000-08:00',\n",
-       "│   │   │   │   display_name=None,\n",
-       "│   │   │   │   failure_reason=None\n",
-       "│   │   │   )\n",
-       "│   │   ],\n",
-       "│   │   failure_reason=None\n",
-       "),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 29, 13, 26, 37, 300000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m''\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Succeeded'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:42.759000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;39m]\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m\u001b[39m=\u001b[0m\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m2025\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m11\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m29\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m13\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m26\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m37\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m300000\u001b[0m\u001b[39m, \u001b[0m\u001b[38;2;215;175;0mtzinfo\u001b[0m\u001b[39m=\u001b[0m\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m\u001b[39m=\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
-       " in <module>:22                                                                                   \n",
-       "                                                                                                  \n",
-       "   19 pprint(existing_execution)                                                                  \n",
-       "   20 print(f\"\\nStatus: {existing_execution.status.overall_status}\")                              \n",
-       "   21                                                                                             \n",
-       " 22 existing_execution.show_results()                                                           \n",
-       "   23                                                                                             \n",
-       "                                                                                                  \n",
-       " /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/tele \n",
-       " metry_logging.py:175 in wrapper                                                                  \n",
-       "                                                                                                  \n",
-       "   172 │   │   │   │   │   \"sagemaker_session is not provided or not valid.\",                     \n",
-       "   173 │   │   │   │   │   func_name,                                                             \n",
-       "   174 │   │   │   │   )                                                                          \n",
-       " 175 │   │   │   │   return func(*args, **kwargs)                                               \n",
-       "   176 │   │                                                                                      \n",
-       "   177 │   │   return wrapper                                                                     \n",
-       "   178                                                                                            \n",
-       "                                                                                                  \n",
-       " /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/exe \n",
-       " cution.py:1223 in show_results                                                                   \n",
-       "                                                                                                  \n",
-       "   1220 │   │   self.refresh()                                                                    \n",
-       "   1221 │   │                                                                                     \n",
-       "   1222 │   │   if self.status.overall_status != \"Succeeded\":                                     \n",
-       " 1223 │   │   │   raise ValueError(                                                             \n",
-       "   1224 │   │   │   │   f\"Cannot show results. Execution status is '{self.status.overall_status}  \n",
-       "   1225 │   │   │   │   f\"Results are only available after successful execution. \"                \n",
-       "   1226 │   │   │   │   f\"Use execution.wait() to wait for completion or check execution.status   \n",
-       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "ValueError: Cannot show results. Execution status is 'Executing'. Results are only available after successful \n",
-       "execution. Use execution.wait() to wait for completion or check execution.status for details.\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m22\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m19 \u001b[0mpprint(existing_execution) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m20 \u001b[0m\u001b[96mprint\u001b[0m(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33mStatus: \u001b[0m\u001b[33m{\u001b[0mexisting_execution.status.overall_status\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m21 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m22 \u001b[1;4mexisting_execution.show_results()\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m23 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2;33m/Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/\u001b[0m\u001b[1;33mtele\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[1;33mmetry_logging.py\u001b[0m:\u001b[94m175\u001b[0m in \u001b[92mwrapper\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m172 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[33m\"\u001b[0m\u001b[33msagemaker_session is not provided or not valid.\u001b[0m\u001b[33m\"\u001b[0m, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m173 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mfunc_name, \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m174 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m175 \u001b[2m│ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[1;4mfunc(*args, **kwargs)\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m176 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m177 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m wrapper \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m178 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2;33m/Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/\u001b[0m\u001b[1;33mexe\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[1;33mcution.py\u001b[0m:\u001b[94m1223\u001b[0m in \u001b[92mshow_results\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1220 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.refresh() \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1221 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1222 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.status.overall_status != \u001b[33m\"\u001b[0m\u001b[33mSucceeded\u001b[0m\u001b[33m\"\u001b[0m: \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m1223 \u001b[2m│ │ │ \u001b[0m\u001b[1;4;94mraise\u001b[0m\u001b[1;4m \u001b[0m\u001b[1;4;96mValueError\u001b[0m\u001b[1;4m(\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1224 \u001b[0m\u001b[1;2;4m│ │ │ │ \u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m\"\u001b[0m\u001b[1;4;33mCannot show results. Execution status is \u001b[0m\u001b[1;4;33m'\u001b[0m\u001b[1;4;33m{\u001b[0m\u001b[1;4;96mself\u001b[0m\u001b[1;4m.status.overall_status\u001b[0m\u001b[1;4;33m}\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1225 \u001b[0m\u001b[1;2;4m│ │ │ │ \u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m\"\u001b[0m\u001b[1;4;33mResults are only available after successful execution. \u001b[0m\u001b[1;4;33m\"\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1226 \u001b[0m\u001b[1;2;4m│ │ │ │ \u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m\"\u001b[0m\u001b[1;4;33mUse execution.wait() to wait for completion or check execution.status \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", - "\u001b[1;91mValueError: \u001b[0mCannot show results. Execution status is \u001b[38;2;0;135;0m'Executing'\u001b[0m. Results are only available after successful \n", - "execution. Use \u001b[1;38;2;225;0;225mexecution.wait\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m to wait for completion or check execution.status for details.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import EvaluationPipelineExecution\n", "from rich.pretty import pprint\n", @@ -2529,84 +357,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Getting or creating artifact for source: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - "INFO - sagemaker.modules.evaluate.base_evaluator - Searching for existing artifact for model package: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - "INFO - sagemaker.modules.evaluate.base_evaluator - Found existing artifact: arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3138877d772ec489bef\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Resolved model info - base_model_name: meta-textgeneration-llama-3-2-1b-instruct, base_model_arn: arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0, source_model_package_arn: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Using configured hyperparameters: {'max_new_tokens': '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing': 'False', 'max_model_len': '12000'}\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Using DETERMINISTIC_TEMPLATE for ModelPackage\n", - "INFO - sagemaker.modules.evaluate.benchmark_evaluator - Resolved template parameters: {'role_arn': 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment', 'mlflow_experiment_name': None, 'mlflow_run_name': None, 'model_package_group_arn': 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo', 'source_model_package_arn': 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28', 'base_model_arn': 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', 'task': 'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all', 's3_output_path': 's3://mufi-test-serverless-smtj/eval/', 'dataset_uri': 's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'subtask': '', 'pipeline_name': 'SagemakerEvaluation-Deterministic', 'dataset_artifact_arn': 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3138877d772ec489bef', 'action_arn_prefix': 'arn:aws:sagemaker:us-west-2:052150106756:action', 'evaluate_base_model': True, 'max_new_tokens': '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing': 'False', 'max_model_len': '12000'}\n", - "INFO - sagemaker.modules.evaluate.execution - Found existing pipeline: SagemakerEvaluation-benchmark\n", - "INFO - sagemaker.modules.evaluate.execution - Updating pipeline SagemakerEvaluation-benchmark with latest definition for benchmark\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/22/25 12:24:36] INFO     Updating pipeline resource.                                         resources.py:30485\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 12:24:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=707103;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=260368;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py#30485\u001b\\\u001b[2m30485\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker_core.main.resources - Updating pipeline resource.\n", - "INFO - sagemaker.modules.evaluate.execution - Successfully updated pipeline: SagemakerEvaluation-benchmark\n", - "INFO - sagemaker.modules.evaluate.execution - Starting pipeline execution: gen-qa-eval-demo-1763843077\n", - "INFO - sagemaker.modules.evaluate.execution - Pipeline execution started: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n" - ] - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8',\n",
-       "name='gen-qa-eval-demo',\n",
-       "status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 22, 12, 24, 37, 828000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m12\u001b[0m, \u001b[1;36m24\u001b[0m, \u001b[1;36m37\u001b[0m, \u001b[1;36m828000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n", - "Initial Status: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Run evaluation with configured parameters\n", "execution = evaluator.evaluate()\n", @@ -2629,52 +380,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:41:19] INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7:                                \n",
-       "                             s3://mufi-test-serverless-smtj/eval/                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:41:19]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=166943;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=816278;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q:                                \n",
-       "                             s3://mufi-test-serverless-smtj/eval/                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=521868;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=351282;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 evaluation(s)\n", - "\n", - " 95qr3e96dblb: Executing\n", - " inlsexrd7jes: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Get all benchmark evaluations (returns iterator)\n", "all_executions_iter = BenchMarkEvaluator.get_all(region=\"us-west-2\")\n", @@ -2698,66 +404,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:2350: UserWarning: Field name \"schema\" in \"AutoMLSnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class AutoMLSnowflakeDatasetDefinition(Base):\n", - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:6372: UserWarning: Field name \"schema\" in \"SnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class SnowflakeDatasetDefinition(Base):\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/22/25 18:32:01] WARNING  No boto3 session provided. Creating a new session.                        utils.py:339\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 18:32:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No boto3 session provided. Creating a new session. \u001b]8;id=549422;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=573139;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#339\u001b\\\u001b[2m339\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  No config provided. Using default config.                                 utils.py:347\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No config provided. Using default config. \u001b]8;id=278829;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=978800;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#347\u001b\\\u001b[2m347\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Succeeded\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "AWS service error when stopping pipeline execution: Pipeline execution with ARN arn:aws:sagemaker:us-west-2:052150106756:pipeline/sagemakerevaluation-benchmark/execution/7rr30o7c2qfb status 'Succeeded'. Only pipelines with 'Executing' status can be stopped.\n" - ] - } - ], + "outputs": [], "source": [ "# Uncomment to stop the job\n", "# existing_execution.stop()\n", diff --git a/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb index 6cf049cb79..c980e42705 100644 --- a/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb +++ b/sagemaker-train/example_notebooks/evaluate/custom_scorer_demo.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -48,19 +48,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Configuration:\n", - " Evaluator: arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1\n", - " Dataset: s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\n", - " Base Model: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - " Output Location: s3://mufi-test-serverless-smtj/eval/\n" - ] - } - ], + "outputs": [], "source": [ "# Evaluator ARN (custom evaluator from AI Registry)\n", "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/00-goga-qa-evaluation/1.0.0\"\n", @@ -103,99 +91,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:33] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:33]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=639873;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=963387;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved MLflow resource ARN:                                    base_evaluator.py:113\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/                      \n",
-       "                             mmlu-eval-experiment                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=342593;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=318918;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ CustomScorerEvaluator created successfully\n" - ] - }, - { - "data": { - "text/html": [ - "
CustomScorerEvaluator(\n",
-       "region=None,\n",
-       "sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x116ae9f40>,\n",
-       "model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n",
-       "base_eval_name='eval-meta-1b49b716',\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group=None,\n",
-       "evaluator='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1',\n",
-       "dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n",
-       "evaluate_base_model=False\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mCustomScorerEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x116ae9f40\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-1b49b716'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Create evaluator with custom evaluator ARN\n", "evaluator = CustomScorerEvaluator(\n", @@ -221,156 +119,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:38] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:38]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=848286;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=998219;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching evaluation override parameters for             custom_scorer_evaluator.py:236\n",
-       "                             hyperparameters property                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for \u001b]8;id=20210;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=113368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#236\u001b\\\u001b[2m236\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m hyperparameters property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Fetching hub content metadata for                                  recipe_utils.py:201\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=402391;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385188;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  No region provided. Using default region.                                 utils.py:340\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=442028;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=947914;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Runs on sagemaker us-west-2, region:us-west-2                             utils.py:354\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=708289;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=968385;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for evaluation recipe with Type='Evaluation' and         recipe_utils.py:221\n",
-       "                             EvaluationType='DeterministicEvaluation'                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=711157;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=750371;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Downloading override parameters from                               recipe_utils.py:249\n",
-       "                             s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta-                    \n",
-       "                             textgeneration-llama-3-2-1b-instruct-deterministic_override_params                    \n",
-       "                             _sm_jobs_v1.0.19.json                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762518;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=755839;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n",
-       "'max_new_tokens': '8192',\n",
-       "'temperature': '0',\n",
-       "'top_k': '-1',\n",
-       "'top_p': '1.0',\n",
-       "'aggregation': '',\n",
-       "'postprocessing': 'False',\n",
-       "'max_model_len': '12000'\n",
-       "}\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "pprint(evaluator.hyperparameters.to_dict())\n", "\n", @@ -392,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -423,916 +174,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:43] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=201476;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=125527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Getting or creating artifact for source:                         base_evaluator.py:597\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=336129;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=429516;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for existing artifact for model package:               base_evaluator.py:459\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=916341;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=92767;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing artifact:                                         base_evaluator.py:468\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3                      \n",
-       "                             138877d772ec489bef                                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=110957;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=865654;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Inferred model package group ARN:                                base_evaluator.py:386\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma from                                                         \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=126121;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=198580;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Automatically inferred model_package_group:                      base_evaluator.py:421\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=183930;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=417470;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using ModelPackage - model_package_group_arn:           custom_scorer_evaluator.py:421\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-                               \n",
-       "                             group/test-finetuned-models-gamma                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=191140;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=51752;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m group/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved model info - base_model_name:                  custom_scorer_evaluator.py:424\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct,                                            \n",
-       "                             base_model_arn:                                                                       \n",
-       "                             arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu                               \n",
-       "                             blicHub/Model/meta-textgeneration-llama-3-2-1b-instruct                               \n",
-       "                             /1.10.0, source_model_package_arn:                                                    \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/                               \n",
-       "                             test-finetuned-models-gamma/28                                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=359160;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=935533;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#424\u001b\\\u001b[2m424\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m blicHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m /\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m test-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=189431;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=22751;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using configured hyperparameters: {'max_new_tokens':    custom_scorer_evaluator.py:299\n",
-       "                             '8192', 'temperature': '0', 'top_k': '-1', 'top_p':                                   \n",
-       "                             '1.0', 'aggregation': '', 'postprocessing': 'False',                                  \n",
-       "                             'max_model_len': '12000'}                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=536279;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=194605;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#299\u001b\\\u001b[2m299\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using full template for ModelPackage                             base_evaluator.py:655\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=164880;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=880373;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:44] INFO     Resolved template parameters: {'role_arn':                       base_evaluator.py:693\n",
-       "                             'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn':                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment', 'mlflow_experiment_name': None,                               \n",
-       "                             'mlflow_run_name': None, 'model_package_group_arn':                                   \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma', 'source_model_package_arn':                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28', 'base_model_arn':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0',                              \n",
-       "                             's3_output_path': 's3://mufi-test-serverless-smtj/eval/',                             \n",
-       "                             'dataset_artifact_arn':                                                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef', 'action_arn_prefix':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:action',                                    \n",
-       "                             'dataset_uri':                                                                        \n",
-       "                             's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task':                        \n",
-       "                             'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all',                           \n",
-       "                             'pipeline_name': 'SagemakerEvaluation-Deterministic',                                 \n",
-       "                             'evaluate_base_model': False, 'evaluator_arn':                                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW                      \n",
-       "                             PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t                      \n",
-       "                             est/0.0.1', 'max_new_tokens': '8192', 'temperature': '0',                             \n",
-       "                             'top_k': '-1', 'top_p': '1.0', 'aggregation': 'mean',                                 \n",
-       "                             'postprocessing': 'True', 'max_model_len': '12000'}                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:44]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=863350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=151185;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m, \u001b[38;2;0;135;0m'evaluator_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m'mean'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'True'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Rendered pipeline definition:                                    base_evaluator.py:702\n",
-       "                             {                                                                                     \n",
-       "                               \"Version\": \"2020-12-01\",                                                            \n",
-       "                               \"Metadata\": {},                                                                     \n",
-       "                               \"MlflowConfig\": {                                                                   \n",
-       "                                 \"MlflowResourceArn\":                                                              \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment\"                                                                \n",
-       "                               },                                                                                  \n",
-       "                               \"Parameters\": [],                                                                   \n",
-       "                               \"Steps\": [                                                                          \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"CreateEvaluationAction\",                                               \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Actions\": [                                                                  \n",
-       "                                       {                                                                           \n",
-       "                                         \"ActionName\": {                                                           \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ActionType\": \"Evaluation\",                                               \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\":                                                            \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\",                                                              \n",
-       "                                           \"SourceType\": \"ModelPackage\"                                            \n",
-       "                                         },                                                                        \n",
-       "                                         \"Properties\": {                                                           \n",
-       "                                           \"PipelineExecutionArn\": {                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           },                                                                      \n",
-       "                                           \"PipelineName\":                                                         \n",
-       "                             \"SagemakerEvaluation-Deterministic\"                                                   \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Contexts\": [                                                                 \n",
-       "                                       {                                                                           \n",
-       "                                         \"ContextName\": {                                                          \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ContextType\": \"PipelineExecution\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Action\"                                                        \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Context\"                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Arn\":                                                                  \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef\"                                                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomModel\",                                                  \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma\",                                                           \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"CustomScorerEvaluation\",                                 \n",
-       "                                       \"EvaluatorArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW                      \n",
-       "                             PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t                      \n",
-       "                             est/0.0.1\"                                                                            \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"task\": \"gen_qa\",                                                           \n",
-       "                                       \"strategy\": \"gen_qa\",                                                       \n",
-       "                                       \"evaluation_metric\": \"all\",                                                 \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\",                                                             \n",
-       "                                       \"max_model_len\": \"12000\",                                                   \n",
-       "                                       \"aggregation\": \"mean\",                                                      \n",
-       "                                       \"postprocessing\": \"True\"                                                    \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\":                                                             \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/\",                                               \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19                      \n",
-       "                             5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"                                 \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"AssociateLineage\",                                                     \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Artifacts\": [                                                                \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-eval-report\"                                                \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"                             \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-eval-report\"                                              \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 }                                                                                 \n",
-       "                               ]                                                                                   \n",
-       "                             }                                                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=395506;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=123517;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomScorerEvaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluatorArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"mean\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"True\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     No existing pipeline found with prefix                                execution.py:212\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation, creating new one                          \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m No existing pipeline found with prefix \u001b]8;id=437465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=501901;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#212\u001b\\\u001b[2m212\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation, creating new one \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Creating new pipeline:                                                 execution.py:57\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3                                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating new pipeline: \u001b]8;id=91501;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=923226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#57\u001b\\\u001b[2m57\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Creating pipeline resource.                                         resources.py:30147\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating pipeline resource. \u001b]8;id=877192;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=410393;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30147\u001b\\\u001b[2m30147\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Successfully created pipeline:                                         execution.py:76\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3                                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully created pipeline: \u001b]8;id=802515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=256656;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#76\u001b\\\u001b[2m76\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Waiting for pipeline                                                   execution.py:79\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3 to be ready...                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Waiting for pipeline \u001b]8;id=984002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=40351;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#79\u001b\\\u001b[2m79\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m to be ready\u001b[33m...\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n",
-       "install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n", - "install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Final Resource Status: Active                                       resources.py:30410\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: \u001b[1mActive\u001b[0m \u001b]8;id=750224;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=46929;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30410\u001b\\\u001b[2m30410\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
                    INFO     Pipeline                                                               execution.py:82\n",
-       "                             SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82                \n",
-       "                             3cbe579c3 is now active and ready for execution                                       \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline \u001b]8;id=674167;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265281;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#82\u001b\\\u001b[2m82\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m is now active and ready for execution \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Starting pipeline execution: eval-meta-1b49b716-1764452564            execution.py:263\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-1b49b716-\u001b[1;36m1764452564\u001b[0m \u001b]8;id=27465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=541837;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:45] INFO     Pipeline execution started:                                           execution.py:274\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/executio                 \n",
-       "                             n/u2q2dl1w5aiq                                                                        \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=368377;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=144012;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e823cbe579c3\u001b[0m/executio \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m n/u2q2dl1w5aiq \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ Evaluation execution started successfully!\n", - " Execution Name: eval-meta-1b49b716\n", - " Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/execution/u2q2dl1w5aiq\n", - " Status: Executing\n" - ] - } - ], + "outputs": [], "source": [ "# Start evaluation\n", "execution = evaluator.evaluate()\n", @@ -1354,72 +198,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Executing',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomModel',\n",
-       "│   │   │   status='Executing',\n",
-       "│   │   │   start_time='2025-11-29T13:42:45.523000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120ab8f80>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-29T13:42:45.523000-08:00',\n",
-       "│   │   │   end_time='2025-11-29T13:42:48.017000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120ab8f80\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:48.017000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Check current status\n", "execution.refresh()\n", @@ -1439,64 +220,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n",
-       "  Overall Status        Succeeded                                                                                \n",
-       "  Target Status         Succeeded                                                                                \n",
-       "  Elapsed Time          0.9s                                                                                     \n",
-       "                                                                                                                 \n",
-       " Pipeline Steps                                                                                                  \n",
-       "  Step Name                       Status           Duration                                                      \n",
-       "  AssociateLineage                Succeeded        1.9s                                                          \n",
-       "  EvaluateCustomModel             Succeeded        7462.5s                                                       \n",
-       "  CreateEvaluationAction          Succeeded        2.5s                                                          \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.9s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m7462.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:36] INFO     Final Resource Status: Succeeded                                      execution.py:979\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=693225;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=873243;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], + "outputs": [], "source": [ "# Wait for job to complete (with rich visual feedback)\n", "execution.wait(poll=30, timeout=3600)\n", @@ -1506,194 +232,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:21:42] INFO     S3 bucket: mufi-test-serverless-smtj, prefix: eval           show_results_utils.py:130\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:42]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=425698;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=639097;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted training job name:                                  show_results_utils.py:63\n",
-       "                             pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from                            \n",
-       "                             step: EvaluateCustomModel                                                             \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=993672;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=652226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for results_*.json in                              show_results_utils.py:150\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E                          \n",
-       "                             valuateCustomModel-FNSg2Knqlf/output/output/                                          \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=724854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=324888;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-FNSg2Knqlf/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:168\n",
-       "                             eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o                          \n",
-       "                             utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct-                          \n",
-       "                             -or8pa/eval_results/results_2025-11-29T23-46-45.108093+00-00                          \n",
-       "                             .json                                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=770358;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=338226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m46\u001b[0m-\u001b[1;36m45.108093\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:43] INFO     Using metrics from key: 'custom|gen_qa_gen_qa|0' (gen_qa or  show_results_utils.py:100\n",
-       "                             custom_scorer format)                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from key: \u001b[38;2;0;135;0m'custom|gen_qa_gen_qa|0'\u001b[0m \u001b[1m(\u001b[0mgen_qa or \u001b]8;id=904034;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=137242;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#100\u001b\\\u001b[2m100\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m custom_scorer format\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                Custom Model Results                \n",
-       "╭────────────────────────────────┬─────────────────╮\n",
-       "│ Metric                                    Value │\n",
-       "├────────────────────────────────┼─────────────────┤\n",
-       "│ bleu                                     6.6928 │\n",
-       "│ bleu_stderr                              0.7769 │\n",
-       "│ byoc_failure_count                    3572.0000 │\n",
-       "│ em                                        1.26% │\n",
-       "│ em_stderr                                0.0019 │\n",
-       "│ f1                                       19.13% │\n",
-       "│ f1_score_quasi                           25.29% │\n",
-       "│ f1_score_quasi_stderr                    0.0049 │\n",
-       "│ f1_stderr                                0.0047 │\n",
-       "│ qem                                       2.21% │\n",
-       "│ qem_stderr                               0.0025 │\n",
-       "│ rouge1                                   25.73% │\n",
-       "│ rouge1_stderr                            0.0047 │\n",
-       "│ rouge2                                   19.15% │\n",
-       "│ rouge2_stderr                            0.0047 │\n",
-       "│ rougeL                                   25.04% │\n",
-       "│ rougeL_stderr                            0.0047 │\n",
-       "╰────────────────────────────────┴─────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7769\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbyoc_failure_count \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 3572.0000\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.26%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.13%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.21%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0025\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.73%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.15%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "  📦 Full evaluation artifacts available at:                                                                     \n",
-       "                                                                                                                 \n",
-       "  Custom Model:                                                                                                  \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non  \n",
-       "  e/eval_results/                                                                                                \n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# show results\n", "execution.show_results()" @@ -1710,25 +251,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker.modules.evaluate.execution - Extracted s3_output_path from training job pipelines-amlk8q2ukw8x-EvaluateCustomModel-VElzvyVY19: s3://mufi-test-serverless-smtj/eval/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Retrieved job: amlk8q2ukw8x\n", - "Status: Succeeded\n" - ] - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import EvaluationPipelineExecution\n", "\n", @@ -1752,18 +277,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 0 custom scorer evaluation(s):\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Get all custom scorer evaluations\n", "all_executions = list(CustomScorerEvaluator.get_all())\n", diff --git a/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb b/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb index 8ba50c3ae7..ab76f46a9c 100644 --- a/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb +++ b/sagemaker-train/example_notebooks/evaluate/llm_as_judge_demo.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -127,93 +127,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:43:52] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:43:52]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=406523;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=534480;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved MLflow resource ARN:                                    base_evaluator.py:113\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/                      \n",
-       "                             mmlu-eval-experiment                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=360312;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=805617;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
LLMAsJudgeEvaluator(\n",
-       "region=None,\n",
-       "sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x15f5c11c0>,\n",
-       "model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n",
-       "base_eval_name='eval-meta-04295d90',\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n",
-       "mlflow_experiment_name=None,\n",
-       "mlflow_run_name=None,\n",
-       "networking=None,\n",
-       "kms_key_id=None,\n",
-       "model_package_group=None,\n",
-       "evaluator_model='anthropic.claude-3-5-haiku-20241022-v1:0',\n",
-       "dataset='s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl',\n",
-       "builtin_metrics=['Completeness', 'Faithfulness'],\n",
-       "custom_metrics='[{\"customMetricDefinition\": {\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\", \"ratingScale\": [{\"definition\": \"Good\", \"value\": {\"floatValue\": 1}}, {\"definition\": \"Poor\", \"value\": {\"floatValue\": 0}}]}}]',\n",
-       "evaluate_base_model=False\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAsJudgeEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x15f5c11c0\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator_model\u001b[0m=\u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbuiltin_metrics\u001b[0m=\u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'Completeness'\u001b[0m, \u001b[38;2;0;135;0m'Faithfulness'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mcustom_metrics\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"customMetricDefinition\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprompt\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\\\\nResponse: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprediction\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\", \"ratingScale\": \u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Good\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 1\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m, \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Poor\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 0\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "\n", "# Create evaluator with custom metrics\n", @@ -242,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -303,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -342,1122 +256,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:22:01] INFO     SageMaker Python SDK will collect telemetry to help us better  telemetry_logging.py:91\n",
-       "                             understand our user's needs, diagnose issues, and deliver                             \n",
-       "                             additional features.                                                                  \n",
-       "                             To opt out of telemetry, please disable via TelemetryOptOut                           \n",
-       "                             parameter in SDK defaults config. For more information, refer                         \n",
-       "                             to                                                                                    \n",
-       "                             https://sagemaker.readthedocs.io/en/stable/overview.html#confi                        \n",
-       "                             guring-and-using-defaults-with-the-sagemaker-python-sdk.                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=931878;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=760856;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Getting or creating artifact for source:                         base_evaluator.py:597\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=179503;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=71430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for existing artifact for model package:               base_evaluator.py:459\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=2444;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=787547;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found existing artifact:                                         base_evaluator.py:468\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3                      \n",
-       "                             138877d772ec489bef                                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=808361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=665812;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Inferred model package group ARN:                                base_evaluator.py:386\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma from                                                         \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine                      \n",
-       "                             tuned-models-gamma/28                                                                 \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=361400;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=518747;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Automatically inferred model_package_group:                      base_evaluator.py:421\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes                      \n",
-       "                             t-finetuned-models-gamma                                                              \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=299761;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=867866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using ModelPackage - model_package_group_arn:            llm_as_judge_evaluator.py:319\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package-g                              \n",
-       "                             roup/test-finetuned-models-gamma                                                      \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=538256;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=292230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#319\u001b\\\u001b[2m319\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-g \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m roup/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved model info - base_model_name:                   llm_as_judge_evaluator.py:322\n",
-       "                             meta-textgeneration-llama-3-2-1b-instruct,                                            \n",
-       "                             base_model_arn:                                                                       \n",
-       "                             arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub                              \n",
-       "                             licHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1                              \n",
-       "                             .10.0, source_model_package_arn:                                                      \n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:model-package/t                              \n",
-       "                             est-finetuned-models-gamma/28                                                         \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=854970;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=553794;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#322\u001b\\\u001b[2m322\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m licHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/t \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m est-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Uploading custom metrics to S3:                          llm_as_judge_evaluator.py:220\n",
-       "                             s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva                              \n",
-       "                             l-meta-04295d9020251130-002201/custom-metrics.json                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Uploading custom metrics to S3: \u001b]8;id=657021;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=5404;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#220\u001b\\\u001b[2m220\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Successfully uploaded custom metrics to:                 llm_as_judge_evaluator.py:228\n",
-       "                             s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva                              \n",
-       "                             l-meta-04295d9020251130-002201/custom-metrics.json                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully uploaded custom metrics to: \u001b]8;id=718083;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=581773;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#228\u001b\\\u001b[2m228\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Using full template for ModelPackage                             base_evaluator.py:655\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=143249;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=489338;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Resolved template parameters: {'role_arn':                       base_evaluator.py:693\n",
-       "                             'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn':                        \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment', 'mlflow_experiment_name': None,                               \n",
-       "                             'mlflow_run_name': None, 'model_package_group_arn':                                   \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma', 'source_model_package_arn':                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28', 'base_model_arn':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0',                              \n",
-       "                             's3_output_path': 's3://mufi-test-serverless-smtj/eval',                              \n",
-       "                             'dataset_artifact_arn':                                                               \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef', 'action_arn_prefix':                                            \n",
-       "                             'arn:aws:sagemaker:us-west-2:052150106756:action',                                    \n",
-       "                             'dataset_uri':                                                                        \n",
-       "                             's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas                      \n",
-       "                             et/gen_qa.jsonl', 'judge_model_id':                                                   \n",
-       "                             'anthropic.claude-3-5-haiku-20241022-v1:0', 'llmaj_metrics':                          \n",
-       "                             '[\"Completeness\", \"Faithfulness\"]', 'custom_metrics_s3_path':                         \n",
-       "                             's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-                      \n",
-       "                             04295d9020251130-002201/custom-metrics.json', 'max_new_tokens':                       \n",
-       "                             '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0',                            \n",
-       "                             'pipeline_name': 'SagemakerModelEvaluationType2-llmaj',                               \n",
-       "                             'evaluate_base_model': False}                                                         \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=109479;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=566018;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'judge_model_id'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m, \u001b[38;2;0;135;0m'llmaj_metrics'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\"Completeness\", \"Faithfulness\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m, \u001b[38;2;0;135;0m'custom_metrics_s3_path'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerModelEvaluationType2-llmaj'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Rendered pipeline definition:                                    base_evaluator.py:702\n",
-       "                             {                                                                                     \n",
-       "                               \"Version\": \"2020-12-01\",                                                            \n",
-       "                               \"Metadata\": {},                                                                     \n",
-       "                               \"MlflowConfig\": {                                                                   \n",
-       "                                 \"MlflowResourceArn\":                                                              \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server                      \n",
-       "                             /mmlu-eval-experiment\"                                                                \n",
-       "                               },                                                                                  \n",
-       "                               \"Parameters\": [],                                                                   \n",
-       "                               \"Steps\": [                                                                          \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"CreateEvaluationAction\",                                               \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Actions\": [                                                                  \n",
-       "                                       {                                                                           \n",
-       "                                         \"ActionName\": {                                                           \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ActionType\": \"Evaluation\",                                               \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\":                                                            \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\",                                                              \n",
-       "                                           \"SourceType\": \"ModelPackage\"                                            \n",
-       "                                         },                                                                        \n",
-       "                                         \"Properties\": {                                                           \n",
-       "                                           \"PipelineExecutionArn\": {                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           },                                                                      \n",
-       "                                           \"PipelineName\":                                                         \n",
-       "                             \"SagemakerModelEvaluationType2-llmaj\"                                                 \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Contexts\": [                                                                 \n",
-       "                                       {                                                                           \n",
-       "                                         \"ContextName\": {                                                          \n",
-       "                                           \"Get\": \"Execution.PipelineExecutionId\"                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"ContextType\": \"PipelineExecution\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionArn\"                               \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Action\"                                                        \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Context\"                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Arn\":                                                                  \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b                      \n",
-       "                             3138877d772ec489bef\"                                                                  \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomInferenceModel\",                                         \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"TrainingJobName\": \"CustomInference\",                                         \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"BenchmarkEvaluation\"                                     \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"name\": \"CustomInference\",                                                  \n",
-       "                                       \"task\": \"inference_only\"                                                    \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\",                      \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma\",                                                           \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     },                                                                            \n",
-       "                                     \"InputDataConfig\": [                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ChannelName\": \"train\",                                                   \n",
-       "                                         \"DataSource\": {                                                           \n",
-       "                                           \"S3DataSource\": {                                                       \n",
-       "                                             \"S3DataType\": \"S3Prefix\",                                             \n",
-       "                                             \"S3Uri\":                                                              \n",
-       "                             \"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas                      \n",
-       "                             et/gen_qa.jsonl\"                                                                      \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   },                                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ]                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"EvaluateCustomModelMetrics\",                                           \n",
-       "                                   \"Type\": \"Training\",                                                             \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"EvaluateCustomInferenceModel\"                                                \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"TrainingJobName\": {                                                          \n",
-       "                                       \"Std:Join\": {                                                               \n",
-       "                                         \"On\": \"-\",                                                                \n",
-       "                                         \"Values\": [                                                               \n",
-       "                                           \"custom-llmaj-eval\",                                                    \n",
-       "                                           {                                                                       \n",
-       "                                             \"Get\": \"Execution.PipelineExecutionId\"                                \n",
-       "                                           }                                                                       \n",
-       "                                         ]                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     },                                                                            \n",
-       "                                     \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\",                            \n",
-       "                                     \"ServerlessJobConfig\": {                                                      \n",
-       "                                       \"BaseModelArn\":                                                             \n",
-       "                             \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/                      \n",
-       "                             Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\",                              \n",
-       "                                       \"AcceptEula\": true,                                                         \n",
-       "                                       \"JobType\": \"Evaluation\",                                                    \n",
-       "                                       \"EvaluationType\": \"LLMAJEvaluation\"                                         \n",
-       "                                     },                                                                            \n",
-       "                                     \"StoppingCondition\": {                                                        \n",
-       "                                       \"MaxRuntimeInSeconds\": 86400                                                \n",
-       "                                     },                                                                            \n",
-       "                                     \"HyperParameters\": {                                                          \n",
-       "                                       \"name\": {                                                                   \n",
-       "                                         \"Std:Join\": {                                                             \n",
-       "                                           \"On\": \"-\",                                                              \n",
-       "                                           \"Values\": [                                                             \n",
-       "                                             \"custom-llmaj-eval\",                                                  \n",
-       "                                             {                                                                     \n",
-       "                                               \"Get\": \"Execution.PipelineExecutionId\"                              \n",
-       "                                             }                                                                     \n",
-       "                                           ]                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       \"judge_model_id\":                                                           \n",
-       "                             \"anthropic.claude-3-5-haiku-20241022-v1:0\",                                           \n",
-       "                                       \"inference_data_s3_path\": {                                                 \n",
-       "                                         \"Std:Join\": {                                                             \n",
-       "                                           \"On\": \"\",                                                               \n",
-       "                                           \"Values\": [                                                             \n",
-       "                                             {                                                                     \n",
-       "                                               \"Get\":                                                              \n",
-       "                             \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat                      \n",
-       "                             h\"                                                                                    \n",
-       "                                             },                                                                    \n",
-       "                                             \"/\",                                                                  \n",
-       "                                             {                                                                     \n",
-       "                                               \"Get\":                                                              \n",
-       "                             \"Steps.EvaluateCustomInferenceModel.TrainingJobName\"                                  \n",
-       "                                             },                                                                    \n",
-       "                                             \"/output/output/\",                                                    \n",
-       "                                             \"CustomInference\",                                                    \n",
-       "                                             \"/eval_results/inference_output.jsonl\"                                \n",
-       "                                           ]                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       \"output_path\": \"s3://mufi-test-serverless-smtj/eval\",                       \n",
-       "                                       \"llmaj_metrics\": \"[\\\"Completeness\\\",                                        \n",
-       "                             \\\"Faithfulness\\\"]\",                                                                   \n",
-       "                                       \"custom_metrics_s3_path\":                                                   \n",
-       "                             \"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-                      \n",
-       "                             04295d9020251130-002201/custom-metrics.json\",                                         \n",
-       "                                       \"max_new_tokens\": \"8192\",                                                   \n",
-       "                                       \"temperature\": \"0\",                                                         \n",
-       "                                       \"top_k\": \"-1\",                                                              \n",
-       "                                       \"top_p\": \"1.0\"                                                              \n",
-       "                                     },                                                                            \n",
-       "                                     \"OutputDataConfig\": {                                                         \n",
-       "                                       \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\",                      \n",
-       "                                       \"CompressionType\": \"NONE\"                                                   \n",
-       "                                     },                                                                            \n",
-       "                                     \"ModelPackageConfig\": {                                                       \n",
-       "                                       \"ModelPackageGroupArn\":                                                     \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te                      \n",
-       "                             st-finetuned-models-gamma\",                                                           \n",
-       "                                       \"SourceModelPackageArn\":                                                    \n",
-       "                             \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin                      \n",
-       "                             etuned-models-gamma/28\"                                                               \n",
-       "                                     }                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 },                                                                                \n",
-       "                                 {                                                                                 \n",
-       "                                   \"Name\": \"AssociateLineage\",                                                     \n",
-       "                                   \"Type\": \"Lineage\",                                                              \n",
-       "                                   \"DependsOn\": [                                                                  \n",
-       "                                     \"CreateEvaluationAction\"                                                      \n",
-       "                                   ],                                                                              \n",
-       "                                   \"Arguments\": {                                                                  \n",
-       "                                     \"Artifacts\": [                                                                \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-inference-results\"                                          \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"InferenceResults\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat                      \n",
-       "                             h\"                                                                                    \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"ArtifactName\": {                                                         \n",
-       "                                           \"Std:Join\": {                                                           \n",
-       "                                             \"On\": \"-\",                                                            \n",
-       "                                             \"Values\": [                                                           \n",
-       "                                               {                                                                   \n",
-       "                                                 \"Get\": \"Execution.PipelineExecutionId\"                            \n",
-       "                                               },                                                                  \n",
-       "                                               \"custom-eval-report\"                                                \n",
-       "                                             ]                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"ArtifactType\": \"EvaluationReport\",                                       \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"SourceUri\": {                                                          \n",
-       "                                             \"Get\":                                                                \n",
-       "                             \"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\"                      \n",
-       "                                           }                                                                       \n",
-       "                                         }                                                                         \n",
-       "                                       }                                                                           \n",
-       "                                     ],                                                                            \n",
-       "                                     \"Associations\": [                                                             \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-inference-results\"                                        \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       },                                                                          \n",
-       "                                       {                                                                           \n",
-       "                                         \"Source\": {                                                               \n",
-       "                                           \"Name\": {                                                               \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"-\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 },                                                                \n",
-       "                                                 \"custom-eval-report\"                                              \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           },                                                                      \n",
-       "                                           \"Type\": \"Artifact\"                                                      \n",
-       "                                         },                                                                        \n",
-       "                                         \"Destination\": {                                                          \n",
-       "                                           \"Arn\": {                                                                \n",
-       "                                             \"Std:Join\": {                                                         \n",
-       "                                               \"On\": \"/\",                                                          \n",
-       "                                               \"Values\": [                                                         \n",
-       "                                                 \"arn:aws:sagemaker:us-west-2:052150106756:ac                      \n",
-       "                             tion\",                                                                                \n",
-       "                                                 {                                                                 \n",
-       "                                                   \"Get\": \"Execution.PipelineExecutionId\"                          \n",
-       "                                                 }                                                                 \n",
-       "                                               ]                                                                   \n",
-       "                                             }                                                                     \n",
-       "                                           }                                                                       \n",
-       "                                         },                                                                        \n",
-       "                                         \"AssociationType\": \"ContributedTo\"                                        \n",
-       "                                       }                                                                           \n",
-       "                                     ]                                                                             \n",
-       "                                   }                                                                               \n",
-       "                                 }                                                                                 \n",
-       "                               ]                                                                                   \n",
-       "                             }                                                                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=358999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=565177;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerModelEvaluationType2-llmaj\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"inference_only\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModelMetrics\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"LLMAJEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"judge_model_id\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"anthropic.claude-3-5-haiku-20241022-v1:0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"inference_data_s3_path\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.TrainingJobName\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/output/output/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/eval_results/inference_output.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"output_path\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"llmaj_metrics\"\u001b[0m: \u001b[38;2;0;135;0m\"\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\\\"Completeness\\\", \u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\\\"Faithfulness\\\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom_metrics_s3_path\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"InferenceResults\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:02] INFO     Found existing pipeline:                                              execution.py:199\n",
-       "                             SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c                 \n",
-       "                             6e9                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:02]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=729179;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=511166;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline                                                     execution.py:202\n",
-       "                             SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c                 \n",
-       "                             6e9 with latest definition                                                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=567297;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Updating pipeline resource.                                         resources.py:30306\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=897054;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=497721;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:03] INFO     Successfully updated pipeline:                                        execution.py:208\n",
-       "                             SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c                 \n",
-       "                             6e9                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=916795;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Starting pipeline execution: eval-meta-04295d90-1764462123            execution.py:263\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-04295d90-\u001b[1;36m1764462123\u001b[0m \u001b]8;id=41189;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=464412;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Pipeline execution started:                                           execution.py:274\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318n                 \n",
-       "                             ngjk32f                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=227887;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844359;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c6e9\u001b[0m/execution/m318n \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m ngjk32f \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Evaluation job started!\n", - "Job ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f\n", - "Job Name: eval-meta-04295d90\n", - "Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
LLMAJEvaluationExecution(\n",
-       "arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f',\n",
-       "name='eval-meta-04295d90',\n",
-       "status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n",
-       "last_modified_time=datetime.datetime(2025, 11, 29, 16, 22, 3, 689000, tzinfo=tzlocal()),\n",
-       "eval_type=<EvalType.LLM_AS_JUDGE: 'llmasjudge'>,\n",
-       "s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n",
-       "steps=[]\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAJEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m16\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m689000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.LLM_AS_JUDGE:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'llmasjudge'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Run evaluation\n", "execution = evaluator.evaluate()\n", @@ -1481,49 +282,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Executing',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Starting',\n",
-       "│   │   │   start_time='2025-11-29T16:22:04.148000-08:00',\n",
-       "│   │   │   end_time='<sagemaker.core.utils.utils.Unassigned object at 0x1298e7170>',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Starting'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T16:22:04.148000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x1298e7170\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Refresh status\n", "execution.refresh()\n", @@ -1543,58 +304,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n",
-       "  Overall Status        Succeeded                                                                                \n",
-       "  Target Status         Succeeded                                                                                \n",
-       "  Elapsed Time          1885.8s                                                                                  \n",
-       "                                                                                                                 \n",
-       " Pipeline Steps                                                                                                  \n",
-       "  Step Name                       Status           Duration                                                      \n",
-       "  AssociateLineage                Succeeded        1.9s                                                          \n",
-       "  EvaluateCustomModelMetrics      Succeeded        1327.1s                                                       \n",
-       "  EvaluateCustomInferenceModel    Succeeded        554.1s                                                        \n",
-       "  CreateEvaluationAction          Succeeded        4.5s                                                          \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m1885.8s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModelMetrics \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1327.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomInferenceModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m554.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m4.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:53:37] INFO     Final Resource Status: Succeeded                                      execution.py:979\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:53:37]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=524139;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=278480;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Wait for job completion (optional)\n", "# This will poll every 5 seconds for up to 1 hour\n", @@ -1603,512 +315,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:07] INFO     Extracted training job name:                                  show_results_utils.py:52\n",
-       "                             pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from                           \n",
-       "                             step: EvaluateCustomModelMetrics (priority: Custom)                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:07]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=177834;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=168478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "  📦 Full evaluation artifacts available at:                                                                     \n",
-       "    s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/                  \n",
-       "                                                                                                                 \n",
-       "                                                                                                                 \n",
-       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     S3 bucket: mufi-test-serverless-smtj, prefix: eval           show_results_utils.py:341\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=453165;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=425984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#341\u001b\\\u001b[2m341\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted training job name:                                  show_results_utils.py:52\n",
-       "                             pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from                           \n",
-       "                             step: EvaluateCustomModelMetrics (priority: Custom)                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=324161;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=683512;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for bedrock summary in                             show_results_utils.py:361\n",
-       "                             s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E                          \n",
-       "                             valuateCustomModelM-lN73ONZ955/output/output/                                         \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for bedrock summary in \u001b]8;id=308182;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=660550;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#361\u001b\\\u001b[2m361\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModelM-lN73ONZ955/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found bedrock job name: custom-llmaj-eval-m318nngjk32f       show_results_utils.py:377\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found bedrock job name: custom-llmaj-eval-m318nngjk32f \u001b]8;id=705765;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=855376;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#377\u001b\\\u001b[2m377\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Searching for JSONL in                                       show_results_utils.py:387\n",
-       "                             s3://mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn                          \n",
-       "                             gjk32f/                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for JSONL in \u001b]8;id=236968;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=874421;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#387\u001b\\\u001b[2m387\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mgjk32f/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found JSONL:                                                 show_results_utils.py:405\n",
-       "                             eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode                          \n",
-       "                             l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421                          \n",
-       "                             4-9c1e-0c0bf2c71fd6_output.jsonl                                                      \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found JSONL: \u001b]8;id=648967;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=247115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#405\u001b\\\u001b[2m405\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Found results file:                                          show_results_utils.py:413\n",
-       "                             eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode                          \n",
-       "                             l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421                          \n",
-       "                             4-9c1e-0c0bf2c71fd6_output.jsonl                                                      \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=234223;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#413\u001b\\\u001b[2m413\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Loaded 3 evaluation results                                  show_results_utils.py:429\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Loaded \u001b[1;36m3\u001b[0m evaluation results \u001b]8;id=139737;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=460642;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#429\u001b\\\u001b[2m429\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "═══ Evaluation 1 of 3 ═══\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[1;36m═══ Evaluation 1 of 3 ═══\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Prompt: What is the next number in this series? 1, 2, 4, 8, 16, ?\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m What is the next number in this series? \u001b[1;36m1\u001b[0m, \u001b[1;36m2\u001b[0m, \u001b[1;36m4\u001b[0m, \u001b[1;36m8\u001b[0m, \u001b[1;36m16\u001b[0m, ?\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: The next number in the series is 32.\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m The next number in the series is \u001b[1;36m32\u001b[0m.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                                               \n",
-       "  Metric                                Score  \n",
-       " ───────────────────────────────────────────── \n",
-       "  Builtin.Completeness                 100.0%  \n",
-       "  Builtin.Faithfulness                 100.0%  \n",
-       "                                               \n",
-       "
\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "═══ Evaluation 2 of 3 ═══\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[1;36m═══ Evaluation 2 of 3 ═══\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Prompt: What is the symbol that ends the sentence as a question\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m What is the symbol that ends the sentence as a question\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: The symbol that ends the sentence as a question is: ?\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m The symbol that ends the sentence as a question is: ?\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                                               \n",
-       "  Metric                                Score  \n",
-       " ───────────────────────────────────────────── \n",
-       "  Builtin.Completeness                 100.0%  \n",
-       "  Builtin.Faithfulness                 100.0%  \n",
-       "                                               \n",
-       "
\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "═══ Evaluation 3 of 3 ═══\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\n", - "\u001b[1;36m═══ Evaluation 3 of 3 ═══\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Prompt: Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: I ate a hamburger today and it was kind of dry.\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m I ate a hamburger today and it was kind of dry.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                                               \n",
-       "  Metric                                Score  \n",
-       " ───────────────────────────────────────────── \n",
-       "  Builtin.Completeness                   0.0%  \n",
-       "  Builtin.Faithfulness                   0.0%  \n",
-       "                                               \n",
-       "
\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n",
-       "
\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Showing evaluations 1-3 of 3\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;36mShowing evaluations \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;36m-\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;36m of \u001b[0m\u001b[1;36m3\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n",
-       "
\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Display results\n", "execution.show_results(limit=10, offset=0, show_explanations=False)" @@ -2127,216 +336,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:15] WARNING  Could not extract eval_type from ARN:                                 execution.py:146\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -llmasjudge/execution/4hr7446yft1d                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:15]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=315627;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953607;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz:                               \n",
-       "                             s3://mufi-test-serverless-smtj/eval                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=739992;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=203397;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  Could not extract eval_type from ARN:                                 execution.py:146\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -llmasjudge                                                                           \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=550335;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=858100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    WARNING  Could not extract eval_type from ARN:                                 execution.py:146\n",
-       "                             arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation                 \n",
-       "                             -llmasjudge/execution/4hr7446yft1d                                                    \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=379628;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=725705;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n",
-       "overall_status='Succeeded',\n",
-       "step_details=[\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='AssociateLineage',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:45:57.889000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:45:59.266000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomModelMetrics',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:27:55.641000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:45:56.749000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='EvaluateCustomInferenceModel',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:18:07.804000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:27:54.474000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   ),\n",
-       "│   │   StepDetail(\n",
-       "│   │   │   name='CreateEvaluationAction',\n",
-       "│   │   │   status='Succeeded',\n",
-       "│   │   │   start_time='2025-11-19T15:18:05.550000-08:00',\n",
-       "│   │   │   end_time='2025-11-19T15:18:07.332000-08:00',\n",
-       "│   │   │   display_name=None,\n",
-       "│   │   │   failure_reason=None\n",
-       "│   │   )\n",
-       "],\n",
-       "failure_reason=None\n",
-       ")\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'AssociateLineage'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:57.889000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:59.266000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModelMetrics'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:55.641000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:56.749000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomInferenceModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.804000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:54.474000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:05.550000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.332000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
-       " in <module>:17                                                                                   \n",
-       "                                                                                                  \n",
-       "   14 )                                                                                           \n",
-       "   15 pprint(existing_execution.status)                                                           \n",
-       "   16                                                                                             \n",
-       " 17 existing_execution.show_results(limit=5, offset=0, show_explanations=False)                 \n",
-       "   18                                                                                             \n",
-       "                                                                                                  \n",
-       " /Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/pydantic/main \n",
-       " .py:1026 in __getattr__                                                                          \n",
-       "                                                                                                  \n",
-       "   1023 │   │   │   │   │   │   return super().__getattribute__(item)  # Raises AttributeError i  \n",
-       "   1024 │   │   │   │   │   else:                                                                 \n",
-       "   1025 │   │   │   │   │   │   # this is the current error                                       \n",
-       " 1026 │   │   │   │   │   │   raise AttributeError(f'{type(self).__name__!r} object has no att  \n",
-       "   1027 │   │                                                                                     \n",
-       "   1028 │   │   def __setattr__(self, name: str, value: Any) -> None:                             \n",
-       "   1029 │   │   │   if (setattr_handler := self.__pydantic_setattr_handlers__.get(name)) is not   \n",
-       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-       "AttributeError: 'EvaluationPipelineExecution' object has no attribute 'show_results'\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m17\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m14 \u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m15 \u001b[0mpprint(existing_execution.status) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m16 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m17 \u001b[1;4mexisting_execution.show_results\u001b[0m(limit=\u001b[94m5\u001b[0m, offset=\u001b[94m0\u001b[0m, show_explanations=\u001b[94mFalse\u001b[0m) \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m18 \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2;33m/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/pydantic/\u001b[0m\u001b[1;33mmain\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m1026\u001b[0m in \u001b[92m__getattr__\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1023 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().\u001b[92m__getattribute__\u001b[0m(item) \u001b[2m# Raises AttributeError i\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1024 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1025 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[2m# this is the current error\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[31m❱ \u001b[0m1026 \u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[1;4;94mraise\u001b[0m\u001b[1;4m \u001b[0m\u001b[1;4;96mAttributeError\u001b[0m\u001b[1;4m(\u001b[0m\u001b[1;4;33mf\u001b[0m\u001b[1;4;33m'\u001b[0m\u001b[1;4;33m{\u001b[0m\u001b[1;4;96mtype\u001b[0m\u001b[1;4m(\u001b[0m\u001b[1;4;96mself\u001b[0m\u001b[1;4m).\u001b[0m\u001b[1;4;91m__name__\u001b[0m\u001b[1;4;33m!r}\u001b[0m\u001b[1;4;33m object has no att\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1027 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1028 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mdef\u001b[0m\u001b[90m \u001b[0m\u001b[92m__setattr__\u001b[0m(\u001b[96mself\u001b[0m, name: \u001b[96mstr\u001b[0m, value: Any) -> \u001b[94mNone\u001b[0m: \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m \u001b[2m1029 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m (setattr_handler := \u001b[96mself\u001b[0m.__pydantic_setattr_handlers__.get(name)) \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[38;2;255;0;0m│\u001b[0m\n", - "\u001b[38;2;255;0;0m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", - "\u001b[1;91mAttributeError: \u001b[0m\u001b[38;2;0;135;0m'EvaluationPipelineExecution'\u001b[0m object has no attribute \u001b[38;2;0;135;0m'show_results'\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Get an existing job by ARN\n", "# Replace with your actual pipeline execution ARN\n", @@ -2365,36 +365,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:21] INFO     Extracted s3_output_path from training job                            execution.py:367\n",
-       "                             pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955:                               \n",
-       "                             s3://mufi-test-serverless-smtj/eval                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=802368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 LLM-as-Judge evaluation jobs\n", - " - m318nngjk32f: Succeeded\n", - " - 2m5hczli7vdp: Failed\n" - ] - } - ], + "outputs": [], "source": [ "from sagemaker.train.evaluate import LLMAsJudgeEvaluator\n", "\n", diff --git a/v3-examples/inference-examples/optimize-example.ipynb b/v3-examples/inference-examples/optimize-example.ipynb index 4ad4f3b513..dfb28af984 100644 --- a/v3-examples/inference-examples/optimize-example.ipynb +++ b/v3-examples/inference-examples/optimize-example.ipynb @@ -54,8 +54,8 @@ "MODEL_ID = \"meta-textgeneration-llama-3-8b-instruct\"\n", "MODEL_NAME_PREFIX = \"jumpstart-optimize-example\"\n", "ENDPOINT_NAME_PREFIX = \"jumpstart-optimize-example-endpoint\"\n", - "AWS_ACCOUNT_ID = \"593793038179\"\n", - "AWS_REGION = \"us-east-2\"\n", + "AWS_ACCOUNT_ID = Session.account_id()\n", + "AWS_REGION = Session.boto_region_name\n", "\n", "# Generate unique identifiers\n", "unique_id = str(uuid.uuid4())[:8]\n", diff --git a/v3-examples/inference-examples/train-inference-e2e-example.ipynb b/v3-examples/inference-examples/train-inference-e2e-example.ipynb index 7a9d45e476..ee9b0ac0c7 100644 --- a/v3-examples/inference-examples/train-inference-e2e-example.ipynb +++ b/v3-examples/inference-examples/train-inference-e2e-example.ipynb @@ -61,7 +61,7 @@ "TRAINING_JOB_PREFIX = \"e2e-v3-pytorch\"\n", "\n", "# AWS Configuration\n", - "AWS_REGION = \"us-west-2\"\n", + "AWS_REGION = Session.boto_region_name\n", "PYTORCH_TRAINING_IMAGE = f\"763104351884.dkr.ecr.{AWS_REGION}.amazonaws.com/pytorch-training:1.13.1-cpu-py39\"\n", "\n", "# Generate unique identifiers\n", diff --git a/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb b/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb index 0acd6fdf54..80435c9325 100644 --- a/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb +++ b/v3-examples/ml-ops-examples/v3-mlflow-train-inference-e2e-example.ipynb @@ -34,8 +34,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Install from local SDK for development (includes fixes for MLflow path resolution issues)\n", - "%pip install -e ../../sagemaker-core -e ../../sagemaker-train -e ../../sagemaker-serve -e ../../sagemaker-mlops -e ../../. \"mlflow==3.4.0\" --upgrade" + "# Install fix for MLflow path resolution issues\n", + "%pip install mlflow==3.4.0" ] }, { @@ -62,6 +62,7 @@ "source": [ "import uuid\n", "from sagemaker.core import image_uris\n", + "from sagemaker.core.helper.session_helper import Session\n", "\n", "# =============================================================================\n", "# MLflow Configuration - UPDATE THIS WITH YOUR TRACKING SERVER ARN\n", @@ -70,7 +71,7 @@ "MLFLOW_TRACKING_ARN = \"XXXXX\"\n", "\n", "# AWS Configuration\n", - "AWS_REGION = \"us-east-1\"\n", + "AWS_REGION = Session.boto_region_name\n", "\n", "# Get PyTorch training image dynamically\n", "PYTORCH_TRAINING_IMAGE = image_uris.retrieve(\n", @@ -556,7 +557,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "py3.10.14", "language": "python", "name": "python3" }, @@ -570,7 +571,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.14" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb b/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb index d307261bbf..6dc3427047 100644 --- a/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb +++ b/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb @@ -23,15 +23,17 @@ "from sagemaker.core import image_uris\n", "import boto3\n", "\n", + "sagemaker_session = Session()\n", + "role = get_execution_role()\n", + "region = sagemaker_session.boto_region_name\n", + "\n", "image_uri = image_uris.retrieve(\n", " framework=\"xgboost\",\n", - " region=\"us-east-1\",\n", + " region=region,\n", " version=\"1.0-1\",\n", " py_version=\"py3\",\n", " instance_type=\"ml.m5.xlarge\",\n", - ")\n", - "sagemaker_session = Session()\n", - "role = get_execution_role()" + ")" ] }, { @@ -181,7 +183,7 @@ "# approve the version before creating model\n", "\n", "# There is a gap that API response for a versioned model package doesn't include model_package_name\n", - "sagemaker_client = boto3.client('sagemaker', region_name='us-east-1')\n", + "sagemaker_client = boto3.client('sagemaker', region_name=region)\n", "sagemaker_client.update_model_package(\n", " ModelPackageArn=registered_model_package_arn,\n", " ModelApprovalStatus=\"Approved\"\n", diff --git a/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb index 20c51e562e..4e49266323 100644 --- a/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb +++ b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb @@ -45,16 +45,6 @@ "## Setup and Dependencies" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "84cf410f", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install --upgrade sagemaker --quiet # restart the kernel after running this cell" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/v3-examples/sagemaker_v3_setup.ipynb b/v3-examples/sagemaker_v3_setup.ipynb new file mode 100644 index 0000000000..6e25fb5acb --- /dev/null +++ b/v3-examples/sagemaker_v3_setup.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SageMaker Python SDK v3+ Setup\n", + "\n", + "This notebook helps you upgrade to SageMaker Python SDK v3+ and verify the installation.\n", + "\n", + "**⚠️ Important:** After running this notebook, restart your kernel before using SageMaker v3." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Check Current Version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip show sagemaker | grep Version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Install/Upgrade\n", + "\n", + "Choose **ONE** of the following methods:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method 1: Standard Upgrade (Try this first)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade sagemaker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method 2: Force Reinstall (If Method 1 fails)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --force-reinstall --no-cache-dir sagemaker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method 3: Clean Uninstall + Reinstall (If residual files exist)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip uninstall -y sagemaker sagemaker-core sagemaker-train sagemaker-serve sagemaker-mlops\n", + "!pip cache purge\n", + "!pip install sagemaker" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Restart Kernel\n", + "\n", + "**⚠️ REQUIRED:** Click **Kernel → Restart Kernel** in the menu above, then continue to Step 4." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Verify Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip show sagemaker | grep Version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Verify Core Components" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from sagemaker.core.helper.session_helper import Session\n", + " from sagemaker.train import ModelTrainer\n", + " from sagemaker.serve import ModelBuilder\n", + " from sagemaker.mlops.workflow.pipeline import Pipeline\n", + " print(\"✓ Core v3 modules imported successfully\")\n", + " \n", + " session = Session()\n", + " print(f\"✓ Session created - Region: {session.boto_region_name}\")\n", + " print(f\"✓ Default bucket: {session.default_bucket()}\")\n", + " \n", + "except ImportError as e:\n", + " print(f\"✗ Import failed: {e}\")\n", + " print(\"Try Method 3 (Clean Uninstall + Reinstall)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Troubleshooting\n", + "\n", + "### Version still shows v2.x after upgrade\n", + "1. Restart kernel (Kernel → Restart Kernel)\n", + "2. Use Method 3 (Clean Uninstall + Reinstall)\n", + "\n", + "### Import errors after upgrade\n", + "Clear Python cache and restart kernel:\n", + "```python\n", + "!find . -type d -name __pycache__ -exec rm -r {} +\n", + "!find . -type f -name '*.pyc' -delete\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/v3-examples/training-examples/distributed-local-training-example.ipynb b/v3-examples/training-examples/distributed-local-training-example.ipynb index 3c607979a4..ece9f5dbd8 100644 --- a/v3-examples/training-examples/distributed-local-training-example.ipynb +++ b/v3-examples/training-examples/distributed-local-training-example.ipynb @@ -6,7 +6,7 @@ "source": [ "# SageMaker V3 Distributed Local Training Example\n", "\n", - "This notebook demonstrates how to run distributed training locally using SageMaker V3 ModelTrainer with multiple Docker containers." + "This notebook demonstrates how to run distributed training locally using SageMaker V3 ModelTrainer with multiple Docker containers. Note: This notebook will not run in SageMaker Studio. " ] }, { diff --git a/v3-examples/training-examples/local-training-example.ipynb b/v3-examples/training-examples/local-training-example.ipynb index 0c2f09ccfe..a279378e49 100644 --- a/v3-examples/training-examples/local-training-example.ipynb +++ b/v3-examples/training-examples/local-training-example.ipynb @@ -6,7 +6,8 @@ "source": [ "# SageMaker V3 Local Training Example\n", "\n", - "This notebook demonstrates how to use SageMaker V3 ModelTrainer in Local Container mode for testing training jobs in Docker containers locally." + "This notebook demonstrates how to use SageMaker V3 ModelTrainer in Local Container mode for testing training jobs in Docker containers locally. \n", + "Note: This notebook will not run in SageMaker Studio. " ] }, {