feat(api): Handle tool calls in responses API

Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
Richard Palethorpe
2025-06-12 05:11:43 +01:00
parent 863c6f3dcb
commit 9e40eea438
3 changed files with 428 additions and 69 deletions

View File

@@ -20,6 +20,10 @@ type Job struct {
UUID string UUID string
Metadata map[string]interface{} Metadata map[string]interface{}
DoneFilter bool DoneFilter bool
// Tools available for this job
BuiltinTools []openai.Tool // Built-in tools like web search
UserTools []openai.Tool // User-defined function tools
pastActions []*ActionRequest pastActions []*ActionRequest
nextAction *Action nextAction *Action
@@ -45,6 +49,18 @@ func WithConversationHistory(history []openai.ChatCompletionMessage) JobOption {
} }
} }
func WithBuiltinTools(tools []openai.Tool) JobOption {
return func(j *Job) {
j.BuiltinTools = tools
}
}
func WithUserTools(tools []openai.Tool) JobOption {
return func(j *Job) {
j.UserTools = tools
}
}
func WithReasoningCallback(f func(ActionCurrentState) bool) JobOption { func WithReasoningCallback(f func(ActionCurrentState) bool) JobOption {
return func(r *Job) { return func(r *Job) {
r.ReasoningCallback = f r.ReasoningCallback = f
@@ -227,3 +243,21 @@ func (j *Job) IncrementEvaluationLoop() {
currentLoop := j.GetEvaluationLoop() currentLoop := j.GetEvaluationLoop()
j.Metadata["evaluation_loop"] = currentLoop + 1 j.Metadata["evaluation_loop"] = currentLoop + 1
} }
// GetBuiltinTools returns the builtin tools for this job
func (j *Job) GetBuiltinTools() []openai.Tool {
return j.BuiltinTools
}
// GetUserTools returns the user tools for this job
func (j *Job) GetUserTools() []openai.Tool {
return j.UserTools
}
// GetAllTools returns all tools (builtin + user) for this job
func (j *Job) GetAllTools() []openai.Tool {
allTools := make([]openai.Tool, 0, len(j.BuiltinTools)+len(j.UserTools))
allTools = append(allTools, j.BuiltinTools...)
allTools = append(allTools, j.UserTools...)
return allTools
}

View File

@@ -513,9 +513,25 @@ func (a *App) Responses(pool *state.AgentPool, tracker *conversations.Conversati
return c.Status(http.StatusInternalServerError).JSON(types.ResponseBody{Error: "Agent not found"}) return c.Status(http.StatusInternalServerError).JSON(types.ResponseBody{Error: "Agent not found"})
} }
res := a.Ask( // Prepare job options
jobOptions := []coreTypes.JobOption{
coreTypes.WithConversationHistory(messages), coreTypes.WithConversationHistory(messages),
) }
// Add tools if present in the request
if len(request.Tools) > 0 {
builtinTools, userTools := types.SeparateTools(request.Tools)
if len(builtinTools) > 0 {
jobOptions = append(jobOptions, coreTypes.WithBuiltinTools(builtinTools))
xlog.Debug("Adding builtin tools to job", "count", len(builtinTools), "agent", agentName)
}
if len(userTools) > 0 {
jobOptions = append(jobOptions, coreTypes.WithUserTools(userTools))
xlog.Debug("Adding user tools to job", "count", len(userTools), "agent", agentName)
}
}
res := a.Ask(jobOptions...)
if res.Error != nil { if res.Error != nil {
xlog.Error("Error asking agent", "agent", agentName, "error", res.Error) xlog.Error("Error asking agent", "agent", agentName, "error", res.Error)

View File

@@ -7,12 +7,108 @@ import (
"github.com/sashabaranov/go-openai" "github.com/sashabaranov/go-openai"
) )
// Input represents either a string or a slice of Message
type Input struct {
Text *string `json:"-"`
Messages *[]Message `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for Input
func (i *Input) UnmarshalJSON(data []byte) error {
// Try to unmarshal as string first
var text string
if err := json.Unmarshal(data, &text); err == nil {
i.Text = &text
return nil
}
// Try to unmarshal as []Message
var messages []Message
if err := json.Unmarshal(data, &messages); err == nil {
i.Messages = &messages
return nil
}
return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
}
// MarshalJSON implements custom JSON marshaling for Input
func (i *Input) MarshalJSON() ([]byte, error) {
if i.Text != nil {
return json.Marshal(*i.Text)
}
if i.Messages != nil {
return json.Marshal(*i.Messages)
}
return json.Marshal(nil)
}
// IsText returns true if the input contains text
func (i *Input) IsText() bool {
return i.Text != nil
}
// IsMessages returns true if the input contains messages
func (i *Input) IsMessages() bool {
return i.Messages != nil
}
// GetText returns the text value or empty string
func (i *Input) GetText() string {
if i.Text != nil {
return *i.Text
}
return ""
}
// GetMessages returns the messages value or empty slice
func (i *Input) GetMessages() []Message {
if i.Messages != nil {
return *i.Messages
}
return nil
}
// Message represents different types of messages in the input
type Message struct {
// Common fields
Type string `json:"type,omitempty"`
// InputMessage fields (when this is a regular chat message)
Role *string `json:"role,omitempty"`
Content *Content `json:"content,omitempty"`
// WebSearchToolCall fields (when type == "web_search_call")
ID *string `json:"id,omitempty"`
Status *string `json:"status,omitempty"`
}
// IsInputMessage returns true if this is a regular chat message
func (m *Message) IsInputMessage() bool {
return m.Role != nil
}
// IsWebSearchCall returns true if this is a web search tool call
func (m *Message) IsWebSearchCall() bool {
return m.Type == "web_search_call"
}
// ToInputMessage converts to InputMessage if this is a regular message
func (m *Message) ToInputMessage() *InputMessage {
if m.IsInputMessage() && m.Role != nil && m.Content != nil {
content := *m.Content
return &InputMessage{
Role: *m.Role,
Content: content,
}
}
return nil
}
// RequestBody represents the request body structure for the OpenAI API // RequestBody represents the request body structure for the OpenAI API
type RequestBody struct { type RequestBody struct {
Model string `json:"model"` Model string `json:"model"`
Input json.RawMessage `json:"input"` Input Input `json:"input"`
InputText string `json:"input_text"`
InputMessages []InputMessage `json:"input_messages"`
Include []string `json:"include,omitempty"` Include []string `json:"include,omitempty"`
Instructions *string `json:"instructions,omitempty"` Instructions *string `json:"instructions,omitempty"`
MaxOutputTokens *int `json:"max_output_tokens,omitempty"` MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
@@ -25,91 +121,78 @@ type RequestBody struct {
Temperature *float64 `json:"temperature,omitempty"` Temperature *float64 `json:"temperature,omitempty"`
Text *TextConfig `json:"text,omitempty"` Text *TextConfig `json:"text,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"` ToolChoice interface{} `json:"tool_choice,omitempty"`
Tools []interface{} `json:"tools,omitempty"` Tools []Tool `json:"tools,omitempty"`
TopP *float64 `json:"top_p,omitempty"` TopP *float64 `json:"top_p,omitempty"`
Truncation *string `json:"truncation,omitempty"` Truncation *string `json:"truncation,omitempty"`
} }
func (r *RequestBody) SetInputByType() { func (r *RequestBody) SetInputByType() {
xlog.Debug("[Parse Request] Set input type", "input", string(r.Input)) // This method is no longer needed as Input handles unmarshaling automatically
if r.Input.IsText() {
var inputText string xlog.Debug("[Parse Request] Set input type as text", "input", r.Input.GetText())
if err := json.Unmarshal(r.Input, &inputText); err == nil { } else if r.Input.IsMessages() {
r.InputText = inputText xlog.Debug("[Parse Request] Input messages parsed", "messages", r.Input.GetMessages())
return
} }
var inputMessages []InputMessage
if err := json.Unmarshal(r.Input, &inputMessages); err != nil {
xlog.Warn("[Parse Request] Input type not recognized", "input", string(r.Input))
return
}
for _, i := range inputMessages {
switch content := i.Content.(type) {
case []ContentItem:
i.ContentItems = content
case string:
i.ContentText = content
default:
xlog.Warn("[Parse Request] Input content type not recognized", "content", content)
}
r.InputMessages = append(r.InputMessages, i)
}
xlog.Debug("[Parse Request] Input messages parsed", "messages", r.InputMessages)
} }
func (r *RequestBody) ToChatCompletionMessages() []openai.ChatCompletionMessage { func (r *RequestBody) ToChatCompletionMessages() []openai.ChatCompletionMessage {
result := []openai.ChatCompletionMessage{} result := []openai.ChatCompletionMessage{}
for _, m := range r.InputMessages { if r.Input.IsMessages() {
content := []openai.ChatMessagePart{} for _, m := range r.Input.GetMessages() {
oneImageWasFound := false // Only process regular input messages, skip web search calls and other types
if !m.IsInputMessage() {
continue
}
if m.ContentText != "" { content := []openai.ChatMessagePart{}
content = append(content, openai.ChatMessagePart{ oneImageWasFound := false
Type: "text",
Text: m.ContentText,
})
}
for _, c := range m.ContentItems { if m.Content != nil && m.Content.IsText() && m.Content.GetText() != "" {
switch c.Type {
case "text":
content = append(content, openai.ChatMessagePart{ content = append(content, openai.ChatMessagePart{
Type: "text", Type: "text",
Text: c.Text, Text: m.Content.GetText(),
})
case "image":
oneImageWasFound = true
content = append(content, openai.ChatMessagePart{
Type: "image",
ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
}) })
} }
}
if oneImageWasFound { if m.Content != nil && m.Content.IsItems() {
result = append(result, openai.ChatCompletionMessage{ for _, c := range m.Content.GetItems() {
Role: m.Role, switch c.Type {
MultiContent: content, case "text":
}) content = append(content, openai.ChatMessagePart{
} else { Type: "text",
for _, c := range content { Text: c.Text,
})
case "image":
oneImageWasFound = true
content = append(content, openai.ChatMessagePart{
Type: "image",
ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
})
}
}
}
if oneImageWasFound {
result = append(result, openai.ChatCompletionMessage{ result = append(result, openai.ChatCompletionMessage{
Role: m.Role, Role: *m.Role,
Content: c.Text, MultiContent: content,
}) })
} else {
for _, c := range content {
result = append(result, openai.ChatCompletionMessage{
Role: *m.Role,
Content: c.Text,
})
}
} }
} }
} }
if r.InputText != "" { if r.Input.IsText() && r.Input.GetText() != "" {
result = append(result, openai.ChatCompletionMessage{ result = append(result, openai.ChatCompletionMessage{
Role: "user", Role: "user",
Content: r.InputText, Content: r.Input.GetText(),
}) })
} }
@@ -182,7 +265,7 @@ type ResponseBody struct {
Temperature float64 `json:"temperature"` Temperature float64 `json:"temperature"`
Text TextConfig `json:"text"` Text TextConfig `json:"text"`
ToolChoice string `json:"tool_choice"` ToolChoice string `json:"tool_choice"`
Tools []interface{} `json:"tools"` Tools []Tool `json:"tools"`
TopP float64 `json:"top_p"` TopP float64 `json:"top_p"`
Truncation string `json:"truncation"` Truncation string `json:"truncation"`
Usage UsageInfo `json:"usage"` Usage UsageInfo `json:"usage"`
@@ -190,12 +273,72 @@ type ResponseBody struct {
Metadata map[string]interface{} `json:"metadata"` Metadata map[string]interface{} `json:"metadata"`
} }
// Content represents either a string or a slice of ContentItem
type Content struct {
Text *string `json:"-"`
Items *[]ContentItem `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for Content
func (c *Content) UnmarshalJSON(data []byte) error {
// Try to unmarshal as string first
var text string
if err := json.Unmarshal(data, &text); err == nil {
c.Text = &text
return nil
}
// Try to unmarshal as []ContentItem
var items []ContentItem
if err := json.Unmarshal(data, &items); err == nil {
c.Items = &items
return nil
}
return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
}
// MarshalJSON implements custom JSON marshaling for Content
func (c *Content) MarshalJSON() ([]byte, error) {
if c.Text != nil {
return json.Marshal(*c.Text)
}
if c.Items != nil {
return json.Marshal(*c.Items)
}
return json.Marshal(nil)
}
// IsText returns true if the content contains text
func (c *Content) IsText() bool {
return c.Text != nil
}
// IsItems returns true if the content contains items
func (c *Content) IsItems() bool {
return c.Items != nil
}
// GetText returns the text value or empty string
func (c *Content) GetText() string {
if c.Text != nil {
return *c.Text
}
return ""
}
// GetItems returns the items value or empty slice
func (c *Content) GetItems() []ContentItem {
if c.Items != nil {
return *c.Items
}
return nil
}
// InputMessage represents a user input message // InputMessage represents a user input message
type InputMessage struct { type InputMessage struct {
Role string `json:"role"` Role string `json:"role"`
Content any `json:"content"` Content Content `json:"content"`
ContentText string `json:"content_text"`
ContentItems []ContentItem `json:"content_items"`
} }
// ContentItem represents an item in a content array // ContentItem represents an item in a content array
@@ -204,3 +347,169 @@ type ContentItem struct {
Text string `json:"text,omitempty"` Text string `json:"text,omitempty"`
ImageURL string `json:"image_url,omitempty"` ImageURL string `json:"image_url,omitempty"`
} }
// Tool represents a tool that can be called by the assistant
type Tool struct {
Type string `json:"type"`
// Function tool fields (used when type == "function")
Name *string `json:"name,omitempty"`
Description *string `json:"description,omitempty"`
Parameters *JSONSchema `json:"parameters,omitempty"`
Strict *bool `json:"strict,omitempty"`
// Web search tool fields (used when type == "web_search_preview" etc.)
SearchContextSize *string `json:"search_context_size,omitempty"`
UserLocation *UserLocation `json:"user_location,omitempty"`
}
// IsFunction returns true if this is a function tool
func (t *Tool) IsFunction() bool {
return t.Type == "function"
}
// IsWebSearch returns true if this is a web search tool
func (t *Tool) IsWebSearch() bool {
return t.Type == "web_search_preview" || t.Type == "web_search_preview_2025_03_11"
}
// ToCompletionFunction converts this tool to a function definition for the completions API
func (t *Tool) ToCompletionFunction() *openai.FunctionDefinition {
if t.IsFunction() && t.Name != nil {
// Regular function tool
var params interface{}
if t.Parameters != nil {
params = t.Parameters
}
desc := ""
if t.Description != nil {
desc = *t.Description
}
return &openai.FunctionDefinition{
Name: *t.Name,
Description: desc,
Parameters: params,
}
}
if t.IsWebSearch() {
// Convert web search builtin to function
name := "web_search_" + t.Type
desc := "Web search tool for finding relevant information online"
// Create parameters schema for web search options
params := map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"search_context_size": map[string]interface{}{
"type": "string",
"enum": []string{"low", "medium", "high"},
"description": "Amount of context window space to use for search",
},
"user_location": map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"type": map[string]interface{}{
"type": "string",
"const": "approximate",
"description": "Type of location approximation",
},
"city": map[string]interface{}{
"type": "string",
"description": "City of the user",
},
"country": map[string]interface{}{
"type": "string",
"description": "Two-letter ISO country code",
},
"region": map[string]interface{}{
"type": "string",
"description": "Region of the user",
},
"timezone": map[string]interface{}{
"type": "string",
"description": "IANA timezone of the user",
},
},
},
},
}
return &openai.FunctionDefinition{
Name: name,
Description: desc,
Parameters: params,
}
}
return nil
}
// ToCompletionTools converts a slice of Tools to openai.Tool format for completions API
func ToCompletionTools(tools []Tool) []openai.Tool {
result := make([]openai.Tool, 0, len(tools))
for _, tool := range tools {
if fn := tool.ToCompletionFunction(); fn != nil {
result = append(result, openai.Tool{
Type: openai.ToolTypeFunction,
Function: fn,
})
}
}
return result
}
// SeparateTools separates a slice of Tools into builtin tools and user tools
func SeparateTools(tools []Tool) (builtinTools []openai.Tool, userTools []openai.Tool) {
for _, tool := range tools {
if tool.IsFunction() {
// User-defined function tool
if fn := tool.ToCompletionFunction(); fn != nil {
userTools = append(userTools, openai.Tool{
Type: openai.ToolTypeFunction,
Function: fn,
})
}
} else if tool.IsWebSearch() {
// Builtin tool (web search)
if fn := tool.ToCompletionFunction(); fn != nil {
builtinTools = append(builtinTools, openai.Tool{
Type: openai.ToolTypeFunction,
Function: fn,
})
}
}
}
return builtinTools, userTools
}
// JSONSchema represents a JSON Schema object for function parameters
type JSONSchema struct {
Type string `json:"type,omitempty"`
Properties map[string]*JSONSchema `json:"properties,omitempty"`
Required []string `json:"required,omitempty"`
Items *JSONSchema `json:"items,omitempty"`
AdditionalProperties *bool `json:"additionalProperties,omitempty"`
Description string `json:"description,omitempty"`
Enum []interface{} `json:"enum,omitempty"`
Format string `json:"format,omitempty"`
Minimum *float64 `json:"minimum,omitempty"`
Maximum *float64 `json:"maximum,omitempty"`
MinLength *int `json:"minLength,omitempty"`
MaxLength *int `json:"maxLength,omitempty"`
}
// UserLocation represents the user's location for web search
type UserLocation struct {
Type string `json:"type"`
City *string `json:"city,omitempty"`
Country *string `json:"country,omitempty"`
Region *string `json:"region,omitempty"`
Timezone *string `json:"timezone,omitempty"`
}