Source code for haive.core.schema.prebuilt.structured_output_state
"""State schema with structured output parsing capabilities using LangChain output parsers."""fromtypingimportAnyfromlangchain_core.messagesimportAIMessage,ToolMessagefromlangchain_core.output_parsersimport(BaseOutputParser,PydanticOutputParser,PydanticToolsParser,)frompydanticimportBaseModel,Field,field_validator,model_validatorfromhaive.core.schema.prebuilt.messages.messages_with_token_usageimport(MessagesStateWithTokenUsage,)
[docs]classStructuredOutputState(MessagesStateWithTokenUsage):"""MessagesState with automatic structured output parsing and token tracking. This state schema extends MessagesStateWithTokenUsage to automatically parse AI messages into structured outputs using LangChain output parsers. It leverages the PydanticToolsParser to convert Pydantic models into tool call messages, maintaining proper message flow and token tracking. Key features: - Automatic parsing of AI messages with structured output - Conversion of Pydantic models to tool call messages - Token usage tracking for all messages including parsed outputs - Support for multiple output parser types - Field validator integration for seamless parsing Examples: from pydantic import BaseModel class SearchQuery(BaseModel): query: str filters: Dict[str, Any] # Configure state with output model state = StructuredOutputState( output_models=[SearchQuery], parse_as_tools=True # Convert to tool calls ) # AI message with structured output gets parsed automatically ai_msg = AIMessage( content='{"query": "python", "filters": {"language": "en"}}', response_metadata={"token_usage": {"total_tokens": 50}} ) state.messages.append(ai_msg) # Automatically creates ToolMessage with parsed content # Token usage is tracked for both original and parsed messages """# Configuration for structured output parsingoutput_models:list[type[BaseModel]]|None=Field(default=None,description="Pydantic models to parse outputs into")output_parser:BaseOutputParser|None=Field(default=None,description="Custom output parser to use")parse_as_tools:bool=Field(default=True,description="Whether to parse Pydantic models as tool calls")auto_parse_ai_messages:bool=Field(default=True,description="Automatically parse AI messages with structured output",)parsed_outputs:dict[str,Any]=Field(default_factory=dict,description="Storage for parsed structured outputs")
[docs]@model_validator(mode="before")@classmethoddefsetup_output_parser(cls,values:dict[str,Any])->dict[str,Any]:"""Setup the appropriate output parser based on configuration."""ifnotvalues.get("output_parser")andvalues.get("output_models"):output_models=values["output_models"]parse_as_tools=values.get("parse_as_tools",True)ifparse_as_toolsandoutput_models:# Use PydanticToolsParser for tool message conversionvalues["output_parser"]=PydanticToolsParser(tools=output_models)elifoutput_modelsandlen(output_models)==1:# Use PydanticOutputParser for single modelvalues["output_parser"]=PydanticOutputParser(pydantic_object=output_models[0])returnvalues
@field_validator("messages",mode="after")@classmethoddefparse_structured_outputs(cls,messages:list[Any],info)->list[Any]:"""Parse AI messages with structured output into appropriate format."""ifnotinfo.data.get("auto_parse_ai_messages"):returnmessagesoutput_parser=info.data.get("output_parser")ifnotoutput_parser:returnmessagesparsed_messages=[]parse_as_tools=info.data.get("parse_as_tools",True)formsginmessages:parsed_messages.append(msg)# Only parse AI messagesifisinstance(msg,AIMessage)andmsg.content:try:# Try to parse the message contentifisinstance(output_parser,PydanticToolsParser):# Parse as tool callsparsed_tools=output_parser.parse(msg.content)# Create tool messages for each parsed toolfortool_callinparsed_tools:tool_msg=ToolMessage(content=str(tool_call),tool_call_id=f"call_{len(parsed_messages)}",name=tool_call.__class__.__name__,)parsed_messages.append(tool_msg)elifisinstance(output_parser,PydanticOutputParser):# Parse as structured objectparsed_obj=output_parser.parse(msg.content)# Store in parsed_outputsmsg_idx=len(parsed_messages)-1info.data.setdefault("parsed_outputs",{})[f"msg_{msg_idx}"]=parsed_objifparse_as_tools:# Also create a tool messagetool_msg=ToolMessage(content=parsed_obj.json(),tool_call_id=f"call_{msg_idx}",name=parsed_obj.__class__.__name__,)parsed_messages.append(tool_msg)exceptException:# If parsing fails, just keep the original message# Could optionally log the error or store itpassreturnparsed_messages
[docs]defget_parsed_output(self,message_index:int)->BaseModel|None:"""Get parsed output for a specific message index."""returnself.parsed_outputs.get(f"msg_{message_index}")
[docs]defget_latest_parsed_output(self)->BaseModel|None:"""Get the most recent parsed output."""ifnotself.parsed_outputs:returnNone# Get the highest message indexmax_idx=max(int(key.split("_")[1])forkeyinself.parsed_outputs)returnself.parsed_outputs.get(f"msg_{max_idx}")
[docs]defget_tool_calls(self)->list[ToolMessage]:"""Get all tool call messages from the conversation."""return[msgformsginself.messagesifisinstance(msg,ToolMessage)]
[docs]defformat_for_structured_output(self)->str:"""Get format instructions for the configured output models."""ifself.output_parserandhasattr(self.output_parser,"get_format_instructions"):returnself.output_parser.get_format_instructions()return""
[docs]classStructuredOutputMixin:"""Mixin to add structured output capabilities to any state schema. This mixin can be used to add structured output parsing to custom state schemas without inheriting from StructuredOutputState. """
[docs]@field_validator("messages",mode="after")@classmethoddefparse_structured_outputs_mixin(cls,messages:list[Any],info)->list[Any]:"""Parse structured outputs in messages (mixin version)."""
# Same logic as above but as a mixin# This allows adding to any state schema