Skip to content

Commit 47ed5a9

Browse files
author
jetstream authors
committed
Merge pull request #265 from AI-Hypercomputer:vij-upload-manifest
PiperOrigin-RevId: 753744020
2 parents 63c0d3e + ae77b83 commit 47ed5a9

File tree

3 files changed

+41
-6
lines changed

3 files changed

+41
-6
lines changed

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ jobs:
191191
- name: Log message if dependent job succeeded
192192
if: ${{ ! (failure() && github.event.pull_request == null) }}
193193
run: echo "Conditions for creating/updating issue not met. Skipping."
194+
- name: Upload manifest to gcs
195+
if: ${{ ! failure() }}
196+
run: gsutil cp ${{ env.BUILD_MANIFEST_DIR }}/${{ needs.build_stable_stack.outputs.manifest_name }} gs://jetstream-inference-stable-stack-artifacts/manifest-files/
194197
- name: Send email
195198
uses: dawidd6/[email protected]
196199
with:
@@ -202,5 +205,5 @@ jobs:
202205
203206
from: JetStream Runs
204207
secure: true
205-
attachments: ${{ env.BUILD_MANIFEST_DIR }}/${{ needs.build_stable_stack.outputs.manifest_name }},${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt,${{ env.BENCHMARK_REPORT_DIR }}/llama_70b_jetstream.txt
208+
attachments: ${{ env.BUILD_MANIFEST_DIR }}/${{ needs.build_stable_stack.outputs.manifest_name }},${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt,${{ env.BENCHMARK_REPORT_DIR }}/llama_70b_jetstream.txt, ${OUTPUT_DIR}/golden-numbers.txt, ${OUTPUT_DIR}/result_comparison.txt
206209
body: workflow for ${{github.repository}} completed successfully!

.github/workflows/test_llama_benchmarks.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,12 @@ sleep 800
2626
cd ..
2727

2828
python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.llama2 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/llama_70b_jetstream.txt
29-
tail -n25 ${OUTPUT_DIR}/llama_70b_jetstream.txt > ${OUTPUT_DIR}/llama_70b_jetstream.tmp && mv ${OUTPUT_DIR}/llama_70b_jetstream.tmp ${OUTPUT_DIR}/llama_70b_jetstream.txt
29+
#tail -n25 ${OUTPUT_DIR}/llama_70b_jetstream.txt > ${OUTPUT_DIR}/llama_70b_jetstream.tmp && mv ${OUTPUT_DIR}/llama_70b_jetstream.tmp ${OUTPUT_DIR}/llama_70b_jetstream.txt
3030

3131
# kill Jetstream server
32-
kill -9 %%
32+
kill -9 %%
33+
tail -n25 ${OUTPUT_DIR}/llama_70b_jetstream.txt > ${OUTPUT_DIR}/llama_70b_jetstream.tmp
34+
echo "\n8x7b Maxtext Jetstream Run throughput and accuracy for llama 70b" >> ${OUTPUT_DIR}/result_comparison.txt
35+
grep "\nthroughput" ${OUTPUT_DIR}/llama_70b_jetstream.tmp >> ${OUTPUT_DIR}/result_comparison.txt
36+
grep "\nrouge1" ${OUTPUT_DIR}/llama_70b_jetstream.tmp >> ${OUTPUT_DIR}/result_comparison.txt
37+
mv ${OUTPUT_DIR}/llama_70b_jetstream.tmp ${OUTPUT_DIR}/llama_70b_jetstream.txt

.github/workflows/test_moe_benchmarks.sh

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,25 @@ cd maxtext
99

1010
# moe 8x7b microbenchmark
1111
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ${OUTPUT_DIR}/moe_8x7b.txt
12-
tail -n5 ${OUTPUT_DIR}/moe_8x7b.txt > ${OUTPUT_DIR}/moe_8x7b.tmp && mv ${OUTPUT_DIR}/moe_8x7b.tmp ${OUTPUT_DIR}/moe_8x7b.txt
12+
tail -n5 ${OUTPUT_DIR}/moe_8x7b.txt > ${OUTPUT_DIR}/moe_8x7b.tmp
13+
echo "\n" >> ${OUTPUT_DIR}/result_comparison.txt
14+
echo "\n8x7b microbenchmark prefill decode latencies" >> ${OUTPUT_DIR}/result_comparison.txt
15+
grep "\nPREFILL" ${OUTPUT_DIR}/moe_8x7b.tmp >> ${OUTPUT_DIR}/result_comparison.txt
16+
grep "\nDECODE" ${OUTPUT_DIR}/moe_8x7b.tmp >> ${OUTPUT_DIR}/result_comparison.txt
17+
mv ${OUTPUT_DIR}/moe_8x7b.tmp ${OUTPUT_DIR}/moe_8x7b.txt
18+
#tail -n5 ${OUTPUT_DIR}/moe_8x7b.txt > ${OUTPUT_DIR}/moe_8x7b.tmp && mv ${OUTPUT_DIR}/moe_8x7b.tmp ${OUTPUT_DIR}/moe_8x7b.txt
1319

1420
# moe 8x22B microbenchmark
1521
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ${OUTPUT_DIR}/moe_8x22b.txt
16-
tail -n5 ${OUTPUT_DIR}/moe_8x22b.txt > ${OUTPUT_DIR}/moe_8x22b.tmp && mv ${OUTPUT_DIR}/moe_8x22b.tmp ${OUTPUT_DIR}/moe_8x22b.txt
22+
23+
tail -n5 ${OUTPUT_DIR}/moe_8x22b.txt > ${OUTPUT_DIR}/moe_8x22b.tmp
24+
echo "\n" >> ${OUTPUT_DIR}/result_comparison.txt
25+
echo "\n8x22b microbenchmark prefill decode latencies" >> ${OUTPUT_DIR}/result_comparison.tmp
26+
grep "\nPREFILL" ${OUTPUT_DIR}/moe_8x22b.tmp >> ${OUTPUT_DIR}/result_comparison.txt
27+
grep "\nDECODE" ${OUTPUT_DIR}/moe_8x22b.tmp >> ${OUTPUT_DIR}/result_comparison.txt
28+
mv ${OUTPUT_DIR}/moe_8x22b.tmp ${OUTPUT_DIR}/moe_8x22b.txt
29+
30+
#tail -n5 ${OUTPUT_DIR}/moe_8x22b.txt > ${OUTPUT_DIR}/moe_8x22b.tmp && mv ${OUTPUT_DIR}/moe_8x22b.tmp ${OUTPUT_DIR}/moe_8x22b.txt
1731

1832
# moe 8x22B 8k context length chunked prefill with 2k prefill chunk size
1933
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
@@ -29,7 +43,20 @@ sleep 600
2943
cd ..
3044

3145
python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
32-
tail -n25 ${OUTPUT_DIR}/moe_8x7b_jetstream.txt > ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp && mv ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
46+
# tail -n25 ${OUTPUT_DIR}/moe_8x7b_jetstream.txt > ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp && mv ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
3347

3448
# kill Jetstream server
3549
kill -9 %%
50+
51+
tail -n25 ${OUTPUT_DIR}/moe_8x7b_jetstream.txt > ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp
52+
echo "\n" >> ${OUTPUT_DIR}/result_comparison.txt
53+
54+
echo "\n8x7b Maxtext Jetstream Run throughput and accuracy for Mixtral 8x7B" >> ${OUTPUT_DIR}/result_comparison.txt
55+
grep "\nthroughput" ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp >> ${OUTPUT_DIR}/result_comparison.txt
56+
grep "\nrouge1" ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp >> ${OUTPUT_DIR}/result_comparison.txt
57+
58+
mv ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
59+
60+
61+
# download golden numbers from gcs
62+
gsutil cp gs://jetstream-inference-stable-stack-artifacts/golden-numbers/golden-numbers.txt ${OUTPUT_DIR}/

0 commit comments

Comments
 (0)