feat(api): Handle tool calls in responses API
Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
@@ -20,6 +20,10 @@ type Job struct {
|
||||
UUID string
|
||||
Metadata map[string]interface{}
|
||||
DoneFilter bool
|
||||
|
||||
// Tools available for this job
|
||||
BuiltinTools []openai.Tool // Built-in tools like web search
|
||||
UserTools []openai.Tool // User-defined function tools
|
||||
|
||||
pastActions []*ActionRequest
|
||||
nextAction *Action
|
||||
@@ -45,6 +49,18 @@ func WithConversationHistory(history []openai.ChatCompletionMessage) JobOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithBuiltinTools(tools []openai.Tool) JobOption {
|
||||
return func(j *Job) {
|
||||
j.BuiltinTools = tools
|
||||
}
|
||||
}
|
||||
|
||||
func WithUserTools(tools []openai.Tool) JobOption {
|
||||
return func(j *Job) {
|
||||
j.UserTools = tools
|
||||
}
|
||||
}
|
||||
|
||||
func WithReasoningCallback(f func(ActionCurrentState) bool) JobOption {
|
||||
return func(r *Job) {
|
||||
r.ReasoningCallback = f
|
||||
@@ -227,3 +243,21 @@ func (j *Job) IncrementEvaluationLoop() {
|
||||
currentLoop := j.GetEvaluationLoop()
|
||||
j.Metadata["evaluation_loop"] = currentLoop + 1
|
||||
}
|
||||
|
||||
// GetBuiltinTools returns the builtin tools for this job
|
||||
func (j *Job) GetBuiltinTools() []openai.Tool {
|
||||
return j.BuiltinTools
|
||||
}
|
||||
|
||||
// GetUserTools returns the user tools for this job
|
||||
func (j *Job) GetUserTools() []openai.Tool {
|
||||
return j.UserTools
|
||||
}
|
||||
|
||||
// GetAllTools returns all tools (builtin + user) for this job
|
||||
func (j *Job) GetAllTools() []openai.Tool {
|
||||
allTools := make([]openai.Tool, 0, len(j.BuiltinTools)+len(j.UserTools))
|
||||
allTools = append(allTools, j.BuiltinTools...)
|
||||
allTools = append(allTools, j.UserTools...)
|
||||
return allTools
|
||||
}
|
||||
|
||||
20
webui/app.go
20
webui/app.go
@@ -513,9 +513,25 @@ func (a *App) Responses(pool *state.AgentPool, tracker *conversations.Conversati
|
||||
return c.Status(http.StatusInternalServerError).JSON(types.ResponseBody{Error: "Agent not found"})
|
||||
}
|
||||
|
||||
res := a.Ask(
|
||||
// Prepare job options
|
||||
jobOptions := []coreTypes.JobOption{
|
||||
coreTypes.WithConversationHistory(messages),
|
||||
)
|
||||
}
|
||||
|
||||
// Add tools if present in the request
|
||||
if len(request.Tools) > 0 {
|
||||
builtinTools, userTools := types.SeparateTools(request.Tools)
|
||||
if len(builtinTools) > 0 {
|
||||
jobOptions = append(jobOptions, coreTypes.WithBuiltinTools(builtinTools))
|
||||
xlog.Debug("Adding builtin tools to job", "count", len(builtinTools), "agent", agentName)
|
||||
}
|
||||
if len(userTools) > 0 {
|
||||
jobOptions = append(jobOptions, coreTypes.WithUserTools(userTools))
|
||||
xlog.Debug("Adding user tools to job", "count", len(userTools), "agent", agentName)
|
||||
}
|
||||
}
|
||||
|
||||
res := a.Ask(jobOptions...)
|
||||
if res.Error != nil {
|
||||
xlog.Error("Error asking agent", "agent", agentName, "error", res.Error)
|
||||
|
||||
|
||||
@@ -7,12 +7,108 @@ import (
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
// Input represents either a string or a slice of Message
|
||||
type Input struct {
|
||||
Text *string `json:"-"`
|
||||
Messages *[]Message `json:"-"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements custom JSON unmarshaling for Input
|
||||
func (i *Input) UnmarshalJSON(data []byte) error {
|
||||
// Try to unmarshal as string first
|
||||
var text string
|
||||
if err := json.Unmarshal(data, &text); err == nil {
|
||||
i.Text = &text
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to unmarshal as []Message
|
||||
var messages []Message
|
||||
if err := json.Unmarshal(data, &messages); err == nil {
|
||||
i.Messages = &messages
|
||||
return nil
|
||||
}
|
||||
|
||||
return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
|
||||
}
|
||||
|
||||
// MarshalJSON implements custom JSON marshaling for Input
|
||||
func (i *Input) MarshalJSON() ([]byte, error) {
|
||||
if i.Text != nil {
|
||||
return json.Marshal(*i.Text)
|
||||
}
|
||||
if i.Messages != nil {
|
||||
return json.Marshal(*i.Messages)
|
||||
}
|
||||
return json.Marshal(nil)
|
||||
}
|
||||
|
||||
// IsText returns true if the input contains text
|
||||
func (i *Input) IsText() bool {
|
||||
return i.Text != nil
|
||||
}
|
||||
|
||||
// IsMessages returns true if the input contains messages
|
||||
func (i *Input) IsMessages() bool {
|
||||
return i.Messages != nil
|
||||
}
|
||||
|
||||
// GetText returns the text value or empty string
|
||||
func (i *Input) GetText() string {
|
||||
if i.Text != nil {
|
||||
return *i.Text
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetMessages returns the messages value or empty slice
|
||||
func (i *Input) GetMessages() []Message {
|
||||
if i.Messages != nil {
|
||||
return *i.Messages
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Message represents different types of messages in the input
|
||||
type Message struct {
|
||||
// Common fields
|
||||
Type string `json:"type,omitempty"`
|
||||
|
||||
// InputMessage fields (when this is a regular chat message)
|
||||
Role *string `json:"role,omitempty"`
|
||||
Content *Content `json:"content,omitempty"`
|
||||
|
||||
// WebSearchToolCall fields (when type == "web_search_call")
|
||||
ID *string `json:"id,omitempty"`
|
||||
Status *string `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
// IsInputMessage returns true if this is a regular chat message
|
||||
func (m *Message) IsInputMessage() bool {
|
||||
return m.Role != nil
|
||||
}
|
||||
|
||||
// IsWebSearchCall returns true if this is a web search tool call
|
||||
func (m *Message) IsWebSearchCall() bool {
|
||||
return m.Type == "web_search_call"
|
||||
}
|
||||
|
||||
// ToInputMessage converts to InputMessage if this is a regular message
|
||||
func (m *Message) ToInputMessage() *InputMessage {
|
||||
if m.IsInputMessage() && m.Role != nil && m.Content != nil {
|
||||
content := *m.Content
|
||||
return &InputMessage{
|
||||
Role: *m.Role,
|
||||
Content: content,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RequestBody represents the request body structure for the OpenAI API
|
||||
type RequestBody struct {
|
||||
Model string `json:"model"`
|
||||
Input json.RawMessage `json:"input"`
|
||||
InputText string `json:"input_text"`
|
||||
InputMessages []InputMessage `json:"input_messages"`
|
||||
Input Input `json:"input"`
|
||||
Include []string `json:"include,omitempty"`
|
||||
Instructions *string `json:"instructions,omitempty"`
|
||||
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
|
||||
@@ -25,91 +121,78 @@ type RequestBody struct {
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
Text *TextConfig `json:"text,omitempty"`
|
||||
ToolChoice interface{} `json:"tool_choice,omitempty"`
|
||||
Tools []interface{} `json:"tools,omitempty"`
|
||||
Tools []Tool `json:"tools,omitempty"`
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
Truncation *string `json:"truncation,omitempty"`
|
||||
}
|
||||
|
||||
func (r *RequestBody) SetInputByType() {
|
||||
xlog.Debug("[Parse Request] Set input type", "input", string(r.Input))
|
||||
|
||||
var inputText string
|
||||
if err := json.Unmarshal(r.Input, &inputText); err == nil {
|
||||
r.InputText = inputText
|
||||
return
|
||||
// This method is no longer needed as Input handles unmarshaling automatically
|
||||
if r.Input.IsText() {
|
||||
xlog.Debug("[Parse Request] Set input type as text", "input", r.Input.GetText())
|
||||
} else if r.Input.IsMessages() {
|
||||
xlog.Debug("[Parse Request] Input messages parsed", "messages", r.Input.GetMessages())
|
||||
}
|
||||
|
||||
var inputMessages []InputMessage
|
||||
if err := json.Unmarshal(r.Input, &inputMessages); err != nil {
|
||||
xlog.Warn("[Parse Request] Input type not recognized", "input", string(r.Input))
|
||||
return
|
||||
}
|
||||
|
||||
for _, i := range inputMessages {
|
||||
switch content := i.Content.(type) {
|
||||
case []ContentItem:
|
||||
i.ContentItems = content
|
||||
case string:
|
||||
i.ContentText = content
|
||||
default:
|
||||
xlog.Warn("[Parse Request] Input content type not recognized", "content", content)
|
||||
}
|
||||
|
||||
r.InputMessages = append(r.InputMessages, i)
|
||||
}
|
||||
|
||||
xlog.Debug("[Parse Request] Input messages parsed", "messages", r.InputMessages)
|
||||
}
|
||||
|
||||
func (r *RequestBody) ToChatCompletionMessages() []openai.ChatCompletionMessage {
|
||||
result := []openai.ChatCompletionMessage{}
|
||||
|
||||
for _, m := range r.InputMessages {
|
||||
content := []openai.ChatMessagePart{}
|
||||
oneImageWasFound := false
|
||||
if r.Input.IsMessages() {
|
||||
for _, m := range r.Input.GetMessages() {
|
||||
// Only process regular input messages, skip web search calls and other types
|
||||
if !m.IsInputMessage() {
|
||||
continue
|
||||
}
|
||||
|
||||
if m.ContentText != "" {
|
||||
content = append(content, openai.ChatMessagePart{
|
||||
Type: "text",
|
||||
Text: m.ContentText,
|
||||
})
|
||||
}
|
||||
content := []openai.ChatMessagePart{}
|
||||
oneImageWasFound := false
|
||||
|
||||
for _, c := range m.ContentItems {
|
||||
switch c.Type {
|
||||
case "text":
|
||||
if m.Content != nil && m.Content.IsText() && m.Content.GetText() != "" {
|
||||
content = append(content, openai.ChatMessagePart{
|
||||
Type: "text",
|
||||
Text: c.Text,
|
||||
})
|
||||
case "image":
|
||||
oneImageWasFound = true
|
||||
content = append(content, openai.ChatMessagePart{
|
||||
Type: "image",
|
||||
ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
|
||||
Text: m.Content.GetText(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if oneImageWasFound {
|
||||
result = append(result, openai.ChatCompletionMessage{
|
||||
Role: m.Role,
|
||||
MultiContent: content,
|
||||
})
|
||||
} else {
|
||||
for _, c := range content {
|
||||
if m.Content != nil && m.Content.IsItems() {
|
||||
for _, c := range m.Content.GetItems() {
|
||||
switch c.Type {
|
||||
case "text":
|
||||
content = append(content, openai.ChatMessagePart{
|
||||
Type: "text",
|
||||
Text: c.Text,
|
||||
})
|
||||
case "image":
|
||||
oneImageWasFound = true
|
||||
content = append(content, openai.ChatMessagePart{
|
||||
Type: "image",
|
||||
ImageURL: &openai.ChatMessageImageURL{URL: c.ImageURL},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if oneImageWasFound {
|
||||
result = append(result, openai.ChatCompletionMessage{
|
||||
Role: m.Role,
|
||||
Content: c.Text,
|
||||
Role: *m.Role,
|
||||
MultiContent: content,
|
||||
})
|
||||
} else {
|
||||
for _, c := range content {
|
||||
result = append(result, openai.ChatCompletionMessage{
|
||||
Role: *m.Role,
|
||||
Content: c.Text,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r.InputText != "" {
|
||||
if r.Input.IsText() && r.Input.GetText() != "" {
|
||||
result = append(result, openai.ChatCompletionMessage{
|
||||
Role: "user",
|
||||
Content: r.InputText,
|
||||
Content: r.Input.GetText(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -182,7 +265,7 @@ type ResponseBody struct {
|
||||
Temperature float64 `json:"temperature"`
|
||||
Text TextConfig `json:"text"`
|
||||
ToolChoice string `json:"tool_choice"`
|
||||
Tools []interface{} `json:"tools"`
|
||||
Tools []Tool `json:"tools"`
|
||||
TopP float64 `json:"top_p"`
|
||||
Truncation string `json:"truncation"`
|
||||
Usage UsageInfo `json:"usage"`
|
||||
@@ -190,12 +273,72 @@ type ResponseBody struct {
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
}
|
||||
|
||||
// Content represents either a string or a slice of ContentItem
|
||||
type Content struct {
|
||||
Text *string `json:"-"`
|
||||
Items *[]ContentItem `json:"-"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements custom JSON unmarshaling for Content
|
||||
func (c *Content) UnmarshalJSON(data []byte) error {
|
||||
// Try to unmarshal as string first
|
||||
var text string
|
||||
if err := json.Unmarshal(data, &text); err == nil {
|
||||
c.Text = &text
|
||||
return nil
|
||||
}
|
||||
|
||||
// Try to unmarshal as []ContentItem
|
||||
var items []ContentItem
|
||||
if err := json.Unmarshal(data, &items); err == nil {
|
||||
c.Items = &items
|
||||
return nil
|
||||
}
|
||||
|
||||
return json.Unmarshal(data, &struct{}{}) // fallback to empty struct
|
||||
}
|
||||
|
||||
// MarshalJSON implements custom JSON marshaling for Content
|
||||
func (c *Content) MarshalJSON() ([]byte, error) {
|
||||
if c.Text != nil {
|
||||
return json.Marshal(*c.Text)
|
||||
}
|
||||
if c.Items != nil {
|
||||
return json.Marshal(*c.Items)
|
||||
}
|
||||
return json.Marshal(nil)
|
||||
}
|
||||
|
||||
// IsText returns true if the content contains text
|
||||
func (c *Content) IsText() bool {
|
||||
return c.Text != nil
|
||||
}
|
||||
|
||||
// IsItems returns true if the content contains items
|
||||
func (c *Content) IsItems() bool {
|
||||
return c.Items != nil
|
||||
}
|
||||
|
||||
// GetText returns the text value or empty string
|
||||
func (c *Content) GetText() string {
|
||||
if c.Text != nil {
|
||||
return *c.Text
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetItems returns the items value or empty slice
|
||||
func (c *Content) GetItems() []ContentItem {
|
||||
if c.Items != nil {
|
||||
return *c.Items
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// InputMessage represents a user input message
|
||||
type InputMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content any `json:"content"`
|
||||
ContentText string `json:"content_text"`
|
||||
ContentItems []ContentItem `json:"content_items"`
|
||||
Role string `json:"role"`
|
||||
Content Content `json:"content"`
|
||||
}
|
||||
|
||||
// ContentItem represents an item in a content array
|
||||
@@ -204,3 +347,169 @@ type ContentItem struct {
|
||||
Text string `json:"text,omitempty"`
|
||||
ImageURL string `json:"image_url,omitempty"`
|
||||
}
|
||||
|
||||
// Tool represents a tool that can be called by the assistant
|
||||
type Tool struct {
|
||||
Type string `json:"type"`
|
||||
|
||||
// Function tool fields (used when type == "function")
|
||||
Name *string `json:"name,omitempty"`
|
||||
Description *string `json:"description,omitempty"`
|
||||
Parameters *JSONSchema `json:"parameters,omitempty"`
|
||||
Strict *bool `json:"strict,omitempty"`
|
||||
|
||||
// Web search tool fields (used when type == "web_search_preview" etc.)
|
||||
SearchContextSize *string `json:"search_context_size,omitempty"`
|
||||
UserLocation *UserLocation `json:"user_location,omitempty"`
|
||||
}
|
||||
|
||||
// IsFunction returns true if this is a function tool
|
||||
func (t *Tool) IsFunction() bool {
|
||||
return t.Type == "function"
|
||||
}
|
||||
|
||||
// IsWebSearch returns true if this is a web search tool
|
||||
func (t *Tool) IsWebSearch() bool {
|
||||
return t.Type == "web_search_preview" || t.Type == "web_search_preview_2025_03_11"
|
||||
}
|
||||
|
||||
// ToCompletionFunction converts this tool to a function definition for the completions API
|
||||
func (t *Tool) ToCompletionFunction() *openai.FunctionDefinition {
|
||||
if t.IsFunction() && t.Name != nil {
|
||||
// Regular function tool
|
||||
var params interface{}
|
||||
if t.Parameters != nil {
|
||||
params = t.Parameters
|
||||
}
|
||||
|
||||
desc := ""
|
||||
if t.Description != nil {
|
||||
desc = *t.Description
|
||||
}
|
||||
|
||||
return &openai.FunctionDefinition{
|
||||
Name: *t.Name,
|
||||
Description: desc,
|
||||
Parameters: params,
|
||||
}
|
||||
}
|
||||
|
||||
if t.IsWebSearch() {
|
||||
// Convert web search builtin to function
|
||||
name := "web_search_" + t.Type
|
||||
desc := "Web search tool for finding relevant information online"
|
||||
|
||||
// Create parameters schema for web search options
|
||||
params := map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"search_context_size": map[string]interface{}{
|
||||
"type": "string",
|
||||
"enum": []string{"low", "medium", "high"},
|
||||
"description": "Amount of context window space to use for search",
|
||||
},
|
||||
"user_location": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"type": map[string]interface{}{
|
||||
"type": "string",
|
||||
"const": "approximate",
|
||||
"description": "Type of location approximation",
|
||||
},
|
||||
"city": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "City of the user",
|
||||
},
|
||||
"country": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Two-letter ISO country code",
|
||||
},
|
||||
"region": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Region of the user",
|
||||
},
|
||||
"timezone": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "IANA timezone of the user",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &openai.FunctionDefinition{
|
||||
Name: name,
|
||||
Description: desc,
|
||||
Parameters: params,
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ToCompletionTools converts a slice of Tools to openai.Tool format for completions API
|
||||
func ToCompletionTools(tools []Tool) []openai.Tool {
|
||||
result := make([]openai.Tool, 0, len(tools))
|
||||
|
||||
for _, tool := range tools {
|
||||
if fn := tool.ToCompletionFunction(); fn != nil {
|
||||
result = append(result, openai.Tool{
|
||||
Type: openai.ToolTypeFunction,
|
||||
Function: fn,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// SeparateTools separates a slice of Tools into builtin tools and user tools
|
||||
func SeparateTools(tools []Tool) (builtinTools []openai.Tool, userTools []openai.Tool) {
|
||||
for _, tool := range tools {
|
||||
if tool.IsFunction() {
|
||||
// User-defined function tool
|
||||
if fn := tool.ToCompletionFunction(); fn != nil {
|
||||
userTools = append(userTools, openai.Tool{
|
||||
Type: openai.ToolTypeFunction,
|
||||
Function: fn,
|
||||
})
|
||||
}
|
||||
} else if tool.IsWebSearch() {
|
||||
// Builtin tool (web search)
|
||||
if fn := tool.ToCompletionFunction(); fn != nil {
|
||||
builtinTools = append(builtinTools, openai.Tool{
|
||||
Type: openai.ToolTypeFunction,
|
||||
Function: fn,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
return builtinTools, userTools
|
||||
}
|
||||
|
||||
// JSONSchema represents a JSON Schema object for function parameters
|
||||
type JSONSchema struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
Properties map[string]*JSONSchema `json:"properties,omitempty"`
|
||||
Required []string `json:"required,omitempty"`
|
||||
Items *JSONSchema `json:"items,omitempty"`
|
||||
AdditionalProperties *bool `json:"additionalProperties,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Enum []interface{} `json:"enum,omitempty"`
|
||||
Format string `json:"format,omitempty"`
|
||||
Minimum *float64 `json:"minimum,omitempty"`
|
||||
Maximum *float64 `json:"maximum,omitempty"`
|
||||
MinLength *int `json:"minLength,omitempty"`
|
||||
MaxLength *int `json:"maxLength,omitempty"`
|
||||
}
|
||||
|
||||
// UserLocation represents the user's location for web search
|
||||
type UserLocation struct {
|
||||
Type string `json:"type"`
|
||||
City *string `json:"city,omitempty"`
|
||||
Country *string `json:"country,omitempty"`
|
||||
Region *string `json:"region,omitempty"`
|
||||
Timezone *string `json:"timezone,omitempty"`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user