@@ -987,6 +987,26 @@ def _emit_tool_call_event(
987987 attributes [GEN_AI_EVENT_CONTENT ] = json .dumps (content_array , ensure_ascii = False )
988988 span .span_instance .add_event (name = GEN_AI_ASSISTANT_MESSAGE_EVENT , attributes = attributes )
989989
990+ def _emit_tool_output_event (
991+ self ,
992+ span : "AbstractSpan" ,
993+ tool_output : Dict [str , Any ],
994+ conversation_id : Optional [str ] = None ,
995+ ) -> None :
996+ """Helper to emit a single tool output event."""
997+ # Wrap tool output in parts array
998+ # Tool outputs are inputs TO the model (from tool execution), so use role "tool"
999+ parts = [{"type" : "tool_call_output" , "content" : tool_output }]
1000+ content_array = [{"role" : "tool" , "parts" : parts }]
1001+ attributes = self ._create_event_attributes (
1002+ conversation_id = conversation_id ,
1003+ message_role = "tool" ,
1004+ )
1005+ # Store as JSON array directly without outer wrapper
1006+ attributes [GEN_AI_EVENT_CONTENT ] = json .dumps (content_array , ensure_ascii = False )
1007+ # Tool outputs are inputs to the model, so use input.messages event
1008+ span .span_instance .add_event (name = GEN_AI_USER_MESSAGE_EVENT , attributes = attributes )
1009+
9901010 def _add_tool_call_events ( # pylint: disable=too-many-branches
9911011 self ,
9921012 span : "AbstractSpan" ,
@@ -1002,9 +1022,11 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
10021022 if not output :
10031023 return
10041024
1025+ # Process output items for tool call events
10051026 for output_item in output :
10061027 try :
10071028 item_type = getattr (output_item , "type" , None )
1029+ # Process item based on type
10081030 if not item_type :
10091031 continue
10101032
@@ -1331,7 +1353,9 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
13311353 self ._emit_tool_call_event (span , tool_call , conversation_id )
13321354
13331355 # Handle unknown/future tool call types with best effort
1334- elif item_type and "_call" in item_type :
1356+ # Exclude _output types - those are handled separately as tool outputs, not tool calls
1357+ elif item_type and "_call" in item_type and not item_type .endswith ("_output" ):
1358+ # Generic handler for tool calls
13351359 try :
13361360 tool_call = {
13371361 "type" : item_type ,
@@ -1345,8 +1369,18 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
13451369
13461370 # Only include detailed fields if content recording is enabled
13471371 if _trace_responses_content :
1348- # Try to get the full tool details using as_dict() if available
1349- if hasattr (output_item , "as_dict" ):
1372+ # Try to get the full tool details using model_dump() for Pydantic models
1373+ if hasattr (output_item , "model_dump" ):
1374+ tool_dict = output_item .model_dump ()
1375+ # Extract the tool-specific details (exclude common fields already captured)
1376+ for key , value in tool_dict .items ():
1377+ if (
1378+ key
1379+ not in ["type" , "id" , "call_id" , "role" , "content" , "status" , "partition_key" ]
1380+ and value is not None
1381+ ):
1382+ tool_call [key ] = value
1383+ elif hasattr (output_item , "as_dict" ):
13501384 tool_dict = output_item .as_dict ()
13511385 # Extract the tool-specific details (exclude common fields already captured)
13521386 for key , value in tool_dict .items ():
@@ -1373,6 +1407,64 @@ def _add_tool_call_events( # pylint: disable=too-many-branches
13731407 # Log but don't crash if we can't handle an unknown tool type
13741408 logger .debug (f"Failed to process unknown tool call type '{ item_type } ': { e } " )
13751409
1410+ # Handle unknown/future tool output types with best effort
1411+ # These are the _output types that correspond to the tool calls above
1412+ elif item_type and item_type .endswith ("_output" ):
1413+ # Generic handler for tool outputs
1414+ try :
1415+ tool_output = {
1416+ "type" : item_type ,
1417+ }
1418+
1419+ # Always try to include common ID fields (safe, needed for correlation)
1420+ for id_field in ["id" , "call_id" ]:
1421+ if hasattr (output_item , id_field ):
1422+ tool_output ["id" ] = getattr (output_item , id_field )
1423+ break # Use first available ID field
1424+
1425+ # Only include detailed fields if content recording is enabled
1426+ if _trace_responses_content :
1427+ # Try to get the full tool output using model_dump() for Pydantic models
1428+ if hasattr (output_item , "model_dump" ):
1429+ output_dict = output_item .model_dump ()
1430+ # Extract the tool-specific output (exclude common fields already captured)
1431+ # Include fields even if empty string (but not None) for API consistency
1432+ for key , value in output_dict .items ():
1433+ if (
1434+ key
1435+ not in ["type" , "id" , "call_id" , "role" , "content" , "status" , "partition_key" ]
1436+ and value is not None
1437+ ):
1438+ tool_output [key ] = value
1439+ elif hasattr (output_item , "as_dict" ):
1440+ output_dict = output_item .as_dict ()
1441+ # Extract the tool-specific output (exclude common fields already captured)
1442+ for key , value in output_dict .items ():
1443+ if (
1444+ key not in ["type" , "id" , "call_id" , "role" , "content" , "status" ]
1445+ and value is not None
1446+ ):
1447+ tool_output [key ] = value
1448+ else :
1449+ # Fallback: try to capture common output fields manually
1450+ for field in [
1451+ "output" ,
1452+ "result" ,
1453+ "results" ,
1454+ "data" ,
1455+ "response" ,
1456+ ]:
1457+ if hasattr (output_item , field ):
1458+ value = getattr (output_item , field )
1459+ if value is not None :
1460+ tool_output [field ] = value
1461+
1462+ self ._emit_tool_output_event (span , tool_output , conversation_id )
1463+
1464+ except Exception as e :
1465+ # Log but don't crash if we can't handle an unknown tool output type
1466+ logger .debug (f"Failed to process unknown tool output type '{ item_type } ': { e } " )
1467+
13761468 except Exception as e :
13771469 # Catch-all to prevent any tool call processing errors from breaking instrumentation
13781470 logger .debug (f"Error processing tool call events: { e } " )
@@ -2241,7 +2333,8 @@ def __init__(
22412333 self .finish_reason = None # Track finish_reason from streaming chunks
22422334
22432335 # Track all output items from streaming events (tool calls, workflow actions, etc.)
2244- self .output_items = {} # Dict[item_id, output_item] - keyed by call_id, action_id, or id
2336+ # Use (id, type) as key to avoid overwriting when call and output have same ID
2337+ self .output_items = {} # Dict[(item_id, item_type), output_item]
22452338 self .has_output_items = False
22462339
22472340 # Expose response attribute for compatibility with ResponseStreamManager
@@ -2302,8 +2395,11 @@ def process_chunk(self, chunk):
23022395 or getattr (item , "id" , None )
23032396 )
23042397 if item_id :
2305- self .output_items [item_id ] = item
2398+ # Use (id, type) tuple as key to distinguish call from output
2399+ key = (item_id , item_type )
2400+ self .output_items [key ] = item
23062401 self .has_output_items = True
2402+ # Items without ID or type are skipped
23072403
23082404 # Capture response ID from ResponseCreatedEvent or ResponseCompletedEvent
23092405 if chunk_type == "response.created" and hasattr (chunk , "response" ):
@@ -2483,6 +2579,7 @@ def __init__(
24832579 self .span_ended = True
24842580
24852581 def __iter__ (self ):
2582+ # Start streaming iteration
24862583 return self
24872584
24882585 def __next__ (self ):
@@ -2713,7 +2810,8 @@ def __init__(
27132810 self .finish_reason = None # Track finish_reason from streaming chunks
27142811
27152812 # Track all output items from streaming events (tool calls, workflow actions, etc.)
2716- self .output_items = {} # Dict[item_id, output_item] - keyed by call_id, action_id, or id
2813+ # Use (id, type) as key to avoid overwriting when call and output have same ID
2814+ self .output_items = {} # Dict[(item_id, item_type), output_item]
27172815 self .has_output_items = False
27182816
27192817 # Expose response attribute for compatibility with AsyncResponseStreamManager
@@ -2776,8 +2874,10 @@ def process_chunk(self, chunk):
27762874 or getattr (item , "id" , None )
27772875 )
27782876 if item_id :
2779- self .output_items [item_id ] = item
2877+ # Use (id, type) tuple as key to distinguish call from output
2878+ self .output_items [(item_id , item_type )] = item
27802879 self .has_output_items = True
2880+ # Items without ID or type are skipped
27812881
27822882 # Capture response ID from ResponseCreatedEvent or ResponseCompletedEvent
27832883 if chunk_type == "response.created" and hasattr (chunk , "response" ):
@@ -3692,8 +3792,8 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
36923792
36933793 event_name = GEN_AI_CONVERSATION_ITEM_EVENT
36943794
3695- elif item_type == "remote_function_call_output " :
3696- # Remote function call output (like Azure AI Search)
3795+ elif item_type == "remote_function_call " :
3796+ # Remote function call (like Bing Custom Search call )
36973797 role = "assistant" # Override role for remote function calls
36983798
36993799 # Extract the tool name
@@ -3718,8 +3818,12 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
37183818 # Extract data from model_extra if available (Pydantic v2 style)
37193819 if hasattr (item , "model_extra" ) and isinstance (item .model_extra , dict ):
37203820 for key , value in item .model_extra .items ():
3721- # Skip already captured fields, redundant fields (name, label), and empty/None values
3722- if key not in ["type" , "id" , "call_id" , "name" , "label" ] and value is not None and value != "" :
3821+ # Skip already captured fields, redundant fields (name, label), internal fields (partition_key), and empty/None values
3822+ if (
3823+ key not in ["type" , "id" , "call_id" , "name" , "label" , "partition_key" ]
3824+ and value is not None
3825+ and value != ""
3826+ ):
37233827 tool_call [key ] = value
37243828
37253829 # Also try as_dict if available
@@ -3748,8 +3852,7 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
37483852 # Fallback: try common fields directly (skip if empty and skip redundant name/label)
37493853 for field in [
37503854 "input" ,
3751- "output" ,
3752- "results" ,
3855+ "arguments" ,
37533856 "status" ,
37543857 "error" ,
37553858 "search_query" ,
@@ -3770,6 +3873,88 @@ def _add_conversation_item_event( # pylint: disable=too-many-branches,too-many-
37703873
37713874 event_name = GEN_AI_CONVERSATION_ITEM_EVENT
37723875
3876+ elif item_type == "remote_function_call_output" :
3877+ # Remote function call output (like Bing Custom Search output)
3878+ role = "tool" # Tool outputs use role "tool"
3879+
3880+ # Extract the tool name
3881+ tool_name = getattr (item , "name" , None ) if hasattr (item , "name" ) else None
3882+
3883+ tool_output = {
3884+ "type" : tool_name if tool_name else "remote_function" ,
3885+ }
3886+
3887+ # Always include ID (needed for correlation)
3888+ if hasattr (item , "id" ):
3889+ tool_output ["id" ] = item .id
3890+ elif hasattr (item , "call_id" ):
3891+ tool_output ["id" ] = item .call_id
3892+ # Check model_extra for call_id
3893+ elif hasattr (item , "model_extra" ) and isinstance (item .model_extra , dict ):
3894+ if "call_id" in item .model_extra :
3895+ tool_output ["id" ] = item .model_extra ["call_id" ]
3896+
3897+ # Only include tool details if content recording is enabled
3898+ if _trace_responses_content :
3899+ # Extract data from model_extra if available (Pydantic v2 style)
3900+ if hasattr (item , "model_extra" ) and isinstance (item .model_extra , dict ):
3901+ for key , value in item .model_extra .items ():
3902+ # Skip already captured fields, redundant fields (name, label), internal fields (partition_key), and empty/None values
3903+ if (
3904+ key not in ["type" , "id" , "call_id" , "name" , "label" , "partition_key" ]
3905+ and value is not None
3906+ and value != ""
3907+ ):
3908+ tool_output [key ] = value
3909+
3910+ # Also try as_dict if available
3911+ if hasattr (item , "as_dict" ):
3912+ try :
3913+ tool_dict = item .as_dict ()
3914+ # Extract relevant fields (exclude already captured ones and empty/None values)
3915+ for key , value in tool_dict .items ():
3916+ if key not in [
3917+ "type" ,
3918+ "id" ,
3919+ "call_id" ,
3920+ "name" ,
3921+ "label" ,
3922+ "role" ,
3923+ "content" ,
3924+ ]:
3925+ # Skip empty strings and None values
3926+ if value is not None and value != "" :
3927+ # Don't overwrite if already exists
3928+ if key not in tool_output :
3929+ tool_output [key ] = value
3930+ except Exception as e :
3931+ logger .debug (f"Failed to extract data from as_dict: { e } " )
3932+
3933+ # Fallback: try common fields directly (skip if empty and skip redundant name/label)
3934+ for field in [
3935+ "input" ,
3936+ "output" ,
3937+ "results" ,
3938+ "status" ,
3939+ "error" ,
3940+ "search_query" ,
3941+ "query" ,
3942+ ]:
3943+ if hasattr (item , field ):
3944+ try :
3945+ value = getattr (item , field )
3946+ if value is not None and value != "" :
3947+ # If not already in tool_output, add it
3948+ if field not in tool_output :
3949+ tool_output [field ] = value
3950+ except Exception :
3951+ pass
3952+
3953+ # Tool outputs use tool_call_output type in parts
3954+ event_body = [{"role" : role , "parts" : [{"type" : "tool_call_output" , "content" : tool_output }]}]
3955+
3956+ event_name = GEN_AI_CONVERSATION_ITEM_EVENT
3957+
37733958 elif item_type == "workflow_action" :
37743959 # Workflow action item - include workflow execution details
37753960 role = "workflow"
0 commit comments