Skip to content

Commit 2d79e41

Browse files
authored
adding tracing support for bing custom search tool (#44291)
* adding tracing support for bing custom search tool * fixes based on review comments
1 parent cbb1db6 commit 2d79e41

File tree

1 file changed

+198
-13
lines changed

1 file changed

+198
-13
lines changed

sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py

Lines changed: 198 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,26 @@ def _emit_tool_call_event(
987987
attributes[GEN_AI_EVENT_CONTENT] = json.dumps(content_array, ensure_ascii=False)
988988
span.span_instance.add_event(name=GEN_AI_ASSISTANT_MESSAGE_EVENT, attributes=attributes)
989989

990+
def _emit_tool_output_event(
991+
self,
992+
span: "AbstractSpan",
993+
tool_output: Dict[str, Any],
994+
conversation_id: Optional[str] = None,
995+
) -> None:
996+
"""Helper to emit a single tool output event."""
997+
# Wrap tool output in parts array
998+
# Tool outputs are inputs TO the model (from tool execution), so use role "tool"
999+
parts = [{"type": "tool_call_output", "content": tool_output}]
1000+
content_array = [{"role": "tool", "parts": parts}]
1001+
attributes = self._create_event_attributes(
1002+
conversation_id=conversation_id,
1003+
message_role="tool",
1004+
)
1005+
# Store as JSON array directly without outer wrapper
1006+
attributes[GEN_AI_EVENT_CONTENT] = json.dumps(content_array, ensure_ascii=False)
1007+
# Tool outputs are inputs to the model, so use input.messages event
1008+
span.span_instance.add_event(name=GEN_AI_USER_MESSAGE_EVENT, attributes=attributes)
1009+
9901010
def _add_tool_call_events( # pylint: disable=too-many-branches
9911011
self,
9921012
span: "AbstractSpan",
@@ -1002,9 +1022,11 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
10021022
if not output:
10031023
return
10041024

1025+
# Process output items for tool call events
10051026
for output_item in output:
10061027
try:
10071028
item_type = getattr(output_item, "type", None)
1029+
# Process item based on type
10081030
if not item_type:
10091031
continue
10101032

@@ -1331,7 +1353,9 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
13311353
self._emit_tool_call_event(span, tool_call, conversation_id)
13321354

13331355
# Handle unknown/future tool call types with best effort
1334-
elif item_type and "_call" in item_type:
1356+
# Exclude _output types - those are handled separately as tool outputs, not tool calls
1357+
elif item_type and "_call" in item_type and not item_type.endswith("_output"):
1358+
# Generic handler for tool calls
13351359
try:
13361360
tool_call = {
13371361
"type": item_type,
@@ -1345,8 +1369,18 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
13451369

13461370
# Only include detailed fields if content recording is enabled
13471371
if _trace_responses_content:
1348-
# Try to get the full tool details using as_dict() if available
1349-
if hasattr(output_item, "as_dict"):
1372+
# Try to get the full tool details using model_dump() for Pydantic models
1373+
if hasattr(output_item, "model_dump"):
1374+
tool_dict = output_item.model_dump()
1375+
# Extract the tool-specific details (exclude common fields already captured)
1376+
for key, value in tool_dict.items():
1377+
if (
1378+
key
1379+
not in ["type", "id", "call_id", "role", "content", "status", "partition_key"]
1380+
and value is not None
1381+
):
1382+
tool_call[key] = value
1383+
elif hasattr(output_item, "as_dict"):
13501384
tool_dict = output_item.as_dict()
13511385
# Extract the tool-specific details (exclude common fields already captured)
13521386
for key, value in tool_dict.items():
@@ -1373,6 +1407,64 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
13731407
# Log but don't crash if we can't handle an unknown tool type
13741408
logger.debug(f"Failed to process unknown tool call type '{item_type}': {e}")
13751409

1410+
# Handle unknown/future tool output types with best effort
1411+
# These are the _output types that correspond to the tool calls above
1412+
elif item_type and item_type.endswith("_output"):
1413+
# Generic handler for tool outputs
1414+
try:
1415+
tool_output = {
1416+
"type": item_type,
1417+
}
1418+
1419+
# Always try to include common ID fields (safe, needed for correlation)
1420+
for id_field in ["id", "call_id"]:
1421+
if hasattr(output_item, id_field):
1422+
tool_output["id"] = getattr(output_item, id_field)
1423+
break # Use first available ID field
1424+
1425+
# Only include detailed fields if content recording is enabled
1426+
if _trace_responses_content:
1427+
# Try to get the full tool output using model_dump() for Pydantic models
1428+
if hasattr(output_item, "model_dump"):
1429+
output_dict = output_item.model_dump()
1430+
# Extract the tool-specific output (exclude common fields already captured)
1431+
# Include fields even if empty string (but not None) for API consistency
1432+
for key, value in output_dict.items():
1433+
if (
1434+
key
1435+
not in ["type", "id", "call_id", "role", "content", "status", "partition_key"]
1436+
and value is not None
1437+
):
1438+
tool_output[key] = value
1439+
elif hasattr(output_item, "as_dict"):
1440+
output_dict = output_item.as_dict()
1441+
# Extract the tool-specific output (exclude common fields already captured)
1442+
for key, value in output_dict.items():
1443+
if (
1444+
key not in ["type", "id", "call_id", "role", "content", "status"]
1445+
and value is not None
1446+
):
1447+
tool_output[key] = value
1448+
else:
1449+
# Fallback: try to capture common output fields manually
1450+
for field in [
1451+
"output",
1452+
"result",
1453+
"results",
1454+
"data",
1455+
"response",
1456+
]:
1457+
if hasattr(output_item, field):
1458+
value = getattr(output_item, field)
1459+
if value is not None:
1460+
tool_output[field] = value
1461+
1462+
self._emit_tool_output_event(span, tool_output, conversation_id)
1463+
1464+
except Exception as e:
1465+
# Log but don't crash if we can't handle an unknown tool output type
1466+
logger.debug(f"Failed to process unknown tool output type '{item_type}': {e}")
1467+
13761468
except Exception as e:
13771469
# Catch-all to prevent any tool call processing errors from breaking instrumentation
13781470
logger.debug(f"Error processing tool call events: {e}")
@@ -2241,7 +2333,8 @@ def __init__(
22412333
self.finish_reason = None # Track finish_reason from streaming chunks
22422334

22432335
# Track all output items from streaming events (tool calls, workflow actions, etc.)
2244-
self.output_items = {} # Dict[item_id, output_item] - keyed by call_id, action_id, or id
2336+
# Use (id, type) as key to avoid overwriting when call and output have same ID
2337+
self.output_items = {} # Dict[(item_id, item_type), output_item]
22452338
self.has_output_items = False
22462339

22472340
# Expose response attribute for compatibility with ResponseStreamManager
@@ -2302,8 +2395,11 @@ def process_chunk(self, chunk):
23022395
or getattr(item, "id", None)
23032396
)
23042397
if item_id:
2305-
self.output_items[item_id] = item
2398+
# Use (id, type) tuple as key to distinguish call from output
2399+
key = (item_id, item_type)
2400+
self.output_items[key] = item
23062401
self.has_output_items = True
2402+
# Items without ID or type are skipped
23072403

23082404
# Capture response ID from ResponseCreatedEvent or ResponseCompletedEvent
23092405
if chunk_type == "response.created" and hasattr(chunk, "response"):
@@ -2483,6 +2579,7 @@ def __init__(
24832579
self.span_ended = True
24842580

24852581
def __iter__(self):
2582+
# Start streaming iteration
24862583
return self
24872584

24882585
def __next__(self):
@@ -2713,7 +2810,8 @@ def __init__(
27132810
self.finish_reason = None # Track finish_reason from streaming chunks
27142811

27152812
# Track all output items from streaming events (tool calls, workflow actions, etc.)
2716-
self.output_items = {} # Dict[item_id, output_item] - keyed by call_id, action_id, or id
2813+
# Use (id, type) as key to avoid overwriting when call and output have same ID
2814+
self.output_items = {} # Dict[(item_id, item_type), output_item]
27172815
self.has_output_items = False
27182816

27192817
# Expose response attribute for compatibility with AsyncResponseStreamManager
@@ -2776,8 +2874,10 @@ def process_chunk(self, chunk):
27762874
or getattr(item, "id", None)
27772875
)
27782876
if item_id:
2779-
self.output_items[item_id] = item
2877+
# Use (id, type) tuple as key to distinguish call from output
2878+
self.output_items[(item_id, item_type)] = item
27802879
self.has_output_items = True
2880+
# Items without ID or type are skipped
27812881

27822882
# Capture response ID from ResponseCreatedEvent or ResponseCompletedEvent
27832883
if chunk_type == "response.created" and hasattr(chunk, "response"):
@@ -3692,8 +3792,8 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
36923792

36933793
event_name = GEN_AI_CONVERSATION_ITEM_EVENT
36943794

3695-
elif item_type == "remote_function_call_output":
3696-
# Remote function call output (like Azure AI Search)
3795+
elif item_type == "remote_function_call":
3796+
# Remote function call (like Bing Custom Search call)
36973797
role = "assistant" # Override role for remote function calls
36983798

36993799
# Extract the tool name
@@ -3718,8 +3818,12 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
37183818
# Extract data from model_extra if available (Pydantic v2 style)
37193819
if hasattr(item, "model_extra") and isinstance(item.model_extra, dict):
37203820
for key, value in item.model_extra.items():
3721-
# Skip already captured fields, redundant fields (name, label), and empty/None values
3722-
if key not in ["type", "id", "call_id", "name", "label"] and value is not None and value != "":
3821+
# Skip already captured fields, redundant fields (name, label), internal fields (partition_key), and empty/None values
3822+
if (
3823+
key not in ["type", "id", "call_id", "name", "label", "partition_key"]
3824+
and value is not None
3825+
and value != ""
3826+
):
37233827
tool_call[key] = value
37243828

37253829
# Also try as_dict if available
@@ -3748,8 +3852,7 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
37483852
# Fallback: try common fields directly (skip if empty and skip redundant name/label)
37493853
for field in [
37503854
"input",
3751-
"output",
3752-
"results",
3855+
"arguments",
37533856
"status",
37543857
"error",
37553858
"search_query",
@@ -3770,6 +3873,88 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
37703873

37713874
event_name = GEN_AI_CONVERSATION_ITEM_EVENT
37723875

3876+
elif item_type == "remote_function_call_output":
3877+
# Remote function call output (like Bing Custom Search output)
3878+
role = "tool" # Tool outputs use role "tool"
3879+
3880+
# Extract the tool name
3881+
tool_name = getattr(item, "name", None) if hasattr(item, "name") else None
3882+
3883+
tool_output = {
3884+
"type": tool_name if tool_name else "remote_function",
3885+
}
3886+
3887+
# Always include ID (needed for correlation)
3888+
if hasattr(item, "id"):
3889+
tool_output["id"] = item.id
3890+
elif hasattr(item, "call_id"):
3891+
tool_output["id"] = item.call_id
3892+
# Check model_extra for call_id
3893+
elif hasattr(item, "model_extra") and isinstance(item.model_extra, dict):
3894+
if "call_id" in item.model_extra:
3895+
tool_output["id"] = item.model_extra["call_id"]
3896+
3897+
# Only include tool details if content recording is enabled
3898+
if _trace_responses_content:
3899+
# Extract data from model_extra if available (Pydantic v2 style)
3900+
if hasattr(item, "model_extra") and isinstance(item.model_extra, dict):
3901+
for key, value in item.model_extra.items():
3902+
# Skip already captured fields, redundant fields (name, label), internal fields (partition_key), and empty/None values
3903+
if (
3904+
key not in ["type", "id", "call_id", "name", "label", "partition_key"]
3905+
and value is not None
3906+
and value != ""
3907+
):
3908+
tool_output[key] = value
3909+
3910+
# Also try as_dict if available
3911+
if hasattr(item, "as_dict"):
3912+
try:
3913+
tool_dict = item.as_dict()
3914+
# Extract relevant fields (exclude already captured ones and empty/None values)
3915+
for key, value in tool_dict.items():
3916+
if key not in [
3917+
"type",
3918+
"id",
3919+
"call_id",
3920+
"name",
3921+
"label",
3922+
"role",
3923+
"content",
3924+
]:
3925+
# Skip empty strings and None values
3926+
if value is not None and value != "":
3927+
# Don't overwrite if already exists
3928+
if key not in tool_output:
3929+
tool_output[key] = value
3930+
except Exception as e:
3931+
logger.debug(f"Failed to extract data from as_dict: {e}")
3932+
3933+
# Fallback: try common fields directly (skip if empty and skip redundant name/label)
3934+
for field in [
3935+
"input",
3936+
"output",
3937+
"results",
3938+
"status",
3939+
"error",
3940+
"search_query",
3941+
"query",
3942+
]:
3943+
if hasattr(item, field):
3944+
try:
3945+
value = getattr(item, field)
3946+
if value is not None and value != "":
3947+
# If not already in tool_output, add it
3948+
if field not in tool_output:
3949+
tool_output[field] = value
3950+
except Exception:
3951+
pass
3952+
3953+
# Tool outputs use tool_call_output type in parts
3954+
event_body = [{"role": role, "parts": [{"type": "tool_call_output", "content": tool_output}]}]
3955+
3956+
event_name = GEN_AI_CONVERSATION_ITEM_EVENT
3957+
37733958
elif item_type == "workflow_action":
37743959
# Workflow action item - include workflow execution details
37753960
role = "workflow"

0 commit comments

Comments
 (0)