Skip to content

Commit ff27918

Browse files
committed
Test message re-ordering fixes
1 parent a60084d commit ff27918

File tree

7 files changed

+108
-42
lines changed

7 files changed

+108
-42
lines changed

.github/workflows/build-and-push-tutorial-agent.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ jobs:
190190
SHOULD_PUSH=false
191191
VERSION_TAG="${{ github.sha }}"
192192
echo "🔍 Building agent for validation: ${{ matrix.agent_path }}"
193+
# Set full image name for validation step (local build)
194+
echo "FULL_IMAGE=${REGISTRY}/${REPOSITORY_NAME}:${VERSION_TAG}" >> $GITHUB_ENV
195+
# Skip image validation for PRs since Buildx doesn't load multi-platform images locally
196+
echo "SKIP_VALIDATION=true" >> $GITHUB_ENV
193197
fi
194198
195199
# Always build locally first (without push)
@@ -203,6 +207,7 @@ jobs:
203207
echo "SHOULD_PUSH=${SHOULD_PUSH}" >> $GITHUB_ENV
204208
205209
- name: Validate agent image
210+
if: env.SKIP_VALIDATION != 'true'
206211
run: |
207212
set -e
208213

examples/tutorials/00_sync/010_multiturn/project/acp.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ async def handle_message_send(
6767
#########################################################
6868

6969
task_messages = await adk.messages.list(task_id=params.task.id)
70+
task_messages = list(reversed(task_messages)) # API returns newest first, reverse to chronological order
7071

7172
#########################################################
7273
# 3. Run the agent with OpenAI Agents SDK

examples/tutorials/00_sync/010_multiturn/tests/test_agent.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,27 @@ class TestNonStreamingMessages:
5757
"""Test non-streaming message sending."""
5858

5959
def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
60+
"""
61+
Test message ordering by sending messages about distinct topics.
62+
63+
This validates that the agent receives messages in chronological order.
64+
If messages are reversed (newest first), the agent would respond about
65+
the wrong topic.
66+
"""
6067
task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
6168
task = task_response.result
6269

6370
assert task is not None
6471

65-
messages = [
66-
"Hello, can you tell me a litle bit about tennis? I want to you make sure you use the word 'tennis' in each response.",
67-
"Pick one of the things you just mentioned, and dive deeper into it.",
68-
"Can you now output a summary of this conversation",
72+
# Each message asks about a distinct topic with a required keyword in response
73+
# This validates message ordering: if order is wrong, agent responds about wrong topic
74+
messages_and_expected_keywords = [
75+
("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
76+
("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
77+
("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
6978
]
7079

71-
for i, msg in enumerate(messages):
80+
for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
7281
response = client.agents.send_message(
7382
agent_name=agent_name,
7483
params=ParamsSendMessageRequest(
@@ -87,7 +96,8 @@ def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
8796
content = message.content
8897
assert content is not None
8998
assert isinstance(content, TextContent) and isinstance(content.content, str)
90-
validate_text_in_string("tennis", content.content)
99+
# Validate response contains the expected keyword for THIS message's topic
100+
validate_text_in_string(expected_keyword, content.content.lower())
91101

92102
states = client.states.list(agent_id=agent_id, task_id=task.id)
93103
assert len(states) == 1
@@ -106,20 +116,29 @@ class TestStreamingMessages:
106116
"""Test streaming message sending."""
107117

108118
def test_stream_message(self, client: Agentex, agent_name: str, agent_id: str):
109-
"""Test streaming messages in a multi-turn conversation."""
119+
"""
120+
Test message ordering with streaming by sending messages about distinct topics.
121+
122+
This validates that the agent receives messages in chronological order.
123+
If messages are reversed (newest first), the agent would respond about
124+
the wrong topic.
125+
"""
110126

111127
# create a task for this specific conversation
112128
task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
113129
task = task_response.result
114130

115131
assert task is not None
116-
messages = [
117-
"Hello, can you tell me a little bit about tennis? I want you to make sure you use the word 'tennis' in each response.",
118-
"Pick one of the things you just mentioned, and dive deeper into it.",
119-
"Can you now output a summary of this conversation",
132+
133+
# Each message asks about a distinct topic with a required keyword in response
134+
# This validates message ordering: if order is wrong, agent responds about wrong topic
135+
messages_and_expected_keywords = [
136+
("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
137+
("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
138+
("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
120139
]
121140

122-
for i, msg in enumerate(messages):
141+
for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
123142
stream = client.agents.send_message_stream(
124143
agent_name=agent_name,
125144
params=ParamsSendMessageRequest(
@@ -136,10 +155,9 @@ def test_stream_message(self, client: Agentex, agent_name: str, agent_id: str):
136155
aggregated_content, chunks = collect_streaming_response(stream)
137156

138157
assert len(chunks) == 1
139-
# Get the actual content (prefer full_content if available, otherwise use aggregated)
140158

141-
# Validate that "tennis" appears in the response because that is what our model does
142-
validate_text_in_string("tennis", aggregated_content)
159+
# Validate response contains the expected keyword for THIS message's topic
160+
validate_text_in_string(expected_keyword, aggregated_content.lower())
143161

144162
states = client.states.list(task_id=task.id)
145163
assert len(states) == 1

examples/tutorials/00_sync/020_streaming/project/acp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ async def handle_message_send(
6868
state = StateModel.model_validate(task_state.state)
6969

7070
task_messages = await adk.messages.list(task_id=params.task.id)
71-
71+
task_messages = list(reversed(task_messages)) # API returns newest first, reverse to chronological order
7272

7373
# Initialize the provider and run config to allow for tracing
7474
provider = SyncStreamingProvider(

examples/tutorials/00_sync/020_streaming/tests/test_agent.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import os
2020

2121
import pytest
22-
from test_utils.sync import collect_streaming_response
22+
from test_utils.sync import validate_text_in_string, collect_streaming_response
2323

2424
from agentex import Agentex
2525
from agentex.types import TextContent, TextContentParam
@@ -57,19 +57,27 @@ class TestNonStreamingMessages:
5757
"""Test non-streaming message sending."""
5858

5959
def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
60-
"""Test sending a message and receiving a response."""
60+
"""
61+
Test message ordering by sending messages about distinct topics.
62+
63+
This validates that the agent receives messages in chronological order.
64+
If messages are reversed (newest first), the agent would respond about
65+
the wrong topic.
66+
"""
6167
task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
6268
task = task_response.result
6369

6470
assert task is not None
6571

66-
messages = [
67-
"Hello, can you tell me a little bit about tennis? I want you to make sure you use the word 'tennis' in each response.",
68-
"Pick one of the things you just mentioned, and dive deeper into it.",
69-
"Can you now output a summary of this conversation",
72+
# Each message asks about a distinct topic with a required keyword in response
73+
# This validates message ordering: if order is wrong, agent responds about wrong topic
74+
messages_and_expected_keywords = [
75+
("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
76+
("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
77+
("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
7078
]
7179

72-
for i, msg in enumerate(messages):
80+
for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
7381
response = client.agents.send_message(
7482
agent_name=agent_name,
7583
params=ParamsSendMessageRequest(
@@ -88,6 +96,8 @@ def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
8896
content = message.content
8997
assert content is not None
9098
assert isinstance(content, TextContent) and isinstance(content.content, str)
99+
# Validate response contains the expected keyword for THIS message's topic
100+
validate_text_in_string(expected_keyword, content.content.lower())
91101

92102
states = client.states.list(agent_id=agent_id, task_id=task.id)
93103
assert len(states) == 1
@@ -105,19 +115,28 @@ class TestStreamingMessages:
105115
"""Test streaming message sending."""
106116

107117
def test_send_stream_message(self, client: Agentex, agent_name: str, agent_id: str):
108-
"""Test streaming messages in a multi-turn conversation."""
118+
"""
119+
Test message ordering with streaming by sending messages about distinct topics.
120+
121+
This validates that the agent receives messages in chronological order.
122+
If messages are reversed (newest first), the agent would respond about
123+
the wrong topic.
124+
"""
109125
# create a task for this specific conversation
110126
task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
111127
task = task_response.result
112128

113129
assert task is not None
114-
messages = [
115-
"Hello, can you tell me a little bit about tennis? I want you to make sure you use the word 'tennis' in each response.",
116-
"Pick one of the things you just mentioned, and dive deeper into it.",
117-
"Can you now output a summary of this conversation",
130+
131+
# Each message asks about a distinct topic with a required keyword in response
132+
# This validates message ordering: if order is wrong, agent responds about wrong topic
133+
messages_and_expected_keywords = [
134+
("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
135+
("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
136+
("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
118137
]
119138

120-
for i, msg in enumerate(messages):
139+
for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
121140
stream = client.agents.send_message_stream(
122141
agent_name=agent_name,
123142
params=ParamsSendMessageRequest(
@@ -137,6 +156,9 @@ def test_send_stream_message(self, client: Agentex, agent_name: str, agent_id: s
137156
# this is using the chat_completion_stream, so we will be getting chunks of data
138157
assert len(chunks) > 1, "No chunks received in streaming response."
139158

159+
# Validate response contains the expected keyword for THIS message's topic
160+
validate_text_in_string(expected_keyword, aggregated_content.lower())
161+
140162
states = client.states.list(agent_id=agent_id, task_id=task.id)
141163
assert len(states) == 1
142164

examples/tutorials/10_async/00_base/040_other_sdks/tests/test_agent.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,13 @@ async def test_send_event_and_poll_with_tool_use(self, client: AsyncAgentex, age
171171

172172
@pytest.mark.asyncio
173173
async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, agent_id: str):
174-
"""Test multiple turns of conversation with state preservation."""
174+
"""
175+
Test message ordering by sending messages about distinct topics.
176+
177+
This validates that the agent receives messages in chronological order.
178+
If messages are reversed (newest first), the agent would respond about
179+
the wrong topic.
180+
"""
175181
# Create a task for this conversation
176182
task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
177183
task = task_response.result
@@ -180,8 +186,8 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
180186
# ensure the task is created before we send the first event
181187
await asyncio.sleep(1)
182188

183-
# First turn
184-
user_message_1 = "My favorite color is blue."
189+
# First turn - ask about tennis
190+
user_message_1 = "Tell me about tennis. You must include the word 'tennis' in your response."
185191
first_turn_response_found = False
186192
async for message in send_event_and_poll_yielding(
187193
client=client,
@@ -198,6 +204,8 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
198204
and message.content.author == "agent"
199205
and message.content.content
200206
):
207+
# Validate response is about tennis
208+
assert "tennis" in message.content.content.lower(), "First response should be about tennis"
201209
first_turn_response_found = True
202210
break
203211

@@ -219,8 +227,9 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
219227

220228
await asyncio.sleep(1)
221229

222-
# Second turn - reference previous context
223-
user_message_2 = "What did I just tell you my favorite color was?"
230+
# Second turn - ask about basketball (different topic)
231+
# If message ordering is wrong, agent might respond about tennis instead
232+
user_message_2 = "Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis."
224233
second_turn_response_found = False
225234
async for message in send_event_and_poll_yielding(
226235
client=client,
@@ -237,7 +246,8 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
237246
and message.content.content
238247
):
239248
response_text = message.content.content.lower()
240-
assert "blue" in response_text
249+
# Validate response is about basketball, not tennis
250+
assert "basketball" in response_text, f"Second response should be about basketball, got: {response_text}"
241251
second_turn_response_found = True
242252
break
243253

examples/tutorials/10_async/10_temporal/010_agent_chat/tests/test_agent.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,13 @@ async def test_send_event_and_poll_with_calculator(self, client: AsyncAgentex, a
143143

144144
@pytest.mark.asyncio
145145
async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str):
146-
"""Test multiple turns of conversation with state preservation."""
146+
"""
147+
Test message ordering by sending messages about distinct topics.
148+
149+
This validates that the agent receives messages in chronological order.
150+
If messages are reversed (newest first), the agent would respond about
151+
the wrong topic.
152+
"""
147153
# Create a task for this conversation
148154
task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
149155
task = task_response.result
@@ -152,8 +158,8 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
152158
# Wait for workflow to initialize
153159
await asyncio.sleep(1)
154160

155-
# First turn
156-
user_message_1 = "My favorite color is blue."
161+
# First turn - ask about tennis
162+
user_message_1 = "Tell me about tennis. You must include the word 'tennis' in your response."
157163
first_turn_found = False
158164
async for message in send_event_and_poll_yielding(
159165
client=client,
@@ -170,6 +176,8 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
170176
and message.content.author == "agent"
171177
and message.content.content
172178
):
179+
# Validate response is about tennis
180+
assert "tennis" in message.content.content.lower(), "First response should be about tennis"
173181
first_turn_found = True
174182
break
175183

@@ -178,9 +186,10 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
178186
# Wait a bit for state to update
179187
await asyncio.sleep(2)
180188

181-
# Second turn - reference previous context
189+
# Second turn - ask about basketball (different topic)
190+
# If message ordering is wrong, agent might respond about tennis instead
182191
found_response = False
183-
user_message_2 = "What did I just tell you my favorite color was?"
192+
user_message_2 = "Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis."
184193
async for message in send_event_and_poll_yielding(
185194
client=client,
186195
agent_id=agent_id,
@@ -196,11 +205,12 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
196205
and message.content.content
197206
):
198207
response_text = message.content.content.lower()
199-
assert "blue" in response_text, f"Expected 'blue' in response but got: {response_text}"
208+
# Validate response is about basketball, not tennis
209+
assert "basketball" in response_text, f"Second response should be about basketball, got: {response_text}"
200210
found_response = True
201211
break
202212

203-
assert found_response, "Did not receive final agent text response with context recall"
213+
assert found_response, "Did not receive final agent text response with correct topic"
204214

205215

206216
class TestStreamingEvents:

0 commit comments

Comments
 (0)