feat: add deep research action (#91)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-04-29 08:46:55 +02:00
committed by GitHub
parent f3884c0244
commit 29f7644577
4 changed files with 244 additions and 28 deletions

View File

@@ -66,6 +66,7 @@ func main() {
localRAG, localRAG,
services.Actions(map[string]string{ services.Actions(map[string]string{
"browser-agent-runner-base-url": localOperatorBaseURL, "browser-agent-runner-base-url": localOperatorBaseURL,
"deep-research-runner-base-url": localOperatorBaseURL,
}), }),
services.Connectors, services.Connectors,
services.DynamicPrompts, services.DynamicPrompts,

View File

@@ -4,69 +4,146 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"net/http" "net/http"
"time"
) )
// Client represents a client for interacting with the LocalOperator API
type Client struct { type Client struct {
baseURL string baseURL string
httpClient *http.Client httpClient *http.Client
} }
// NewClient creates a new API client func NewClient(baseURL string, timeout ...time.Duration) *Client {
func NewClient(baseURL string) *Client { defaultTimeout := 30 * time.Second
if len(timeout) > 0 {
defaultTimeout = timeout[0]
}
return &Client{ return &Client{
baseURL: baseURL, baseURL: baseURL,
httpClient: &http.Client{}, httpClient: &http.Client{
Timeout: defaultTimeout,
},
} }
} }
// AgentRequest represents the request body for running an agent
type AgentRequest struct { type AgentRequest struct {
Goal string `json:"goal"` Goal string `json:"goal"`
MaxAttempts int `json:"max_attempts,omitempty"` MaxAttempts int `json:"max_attempts,omitempty"`
MaxNoActionAttempts int `json:"max_no_action_attempts,omitempty"` MaxNoActionAttempts int `json:"max_no_action_attempts,omitempty"`
} }
// StateDescription represents a single state in the agent's history type DesktopAgentRequest struct {
AgentRequest
DesktopURL string `json:"desktop_url"`
}
type DeepResearchRequest struct {
Topic string `json:"topic"`
MaxCycles int `json:"max_cycles,omitempty"`
MaxNoActionAttempts int `json:"max_no_action_attempts,omitempty"`
MaxResults int `json:"max_results,omitempty"`
}
// Response types
type StateDescription struct { type StateDescription struct {
CurrentURL string `json:"current_url"` CurrentURL string `json:"current_url"`
PageTitle string `json:"page_title"` PageTitle string `json:"page_title"`
PageContentDescription string `json:"page_content_description"` PageContentDescription string `json:"page_content_description"`
Screenshot string `json:"screenshot"` Screenshot string `json:"screenshot"`
ScreenshotMimeType string `json:"screenshot_mime_type"` // MIME type of the screenshot (e.g., "image/png") ScreenshotMimeType string `json:"screenshot_mime_type"`
} }
// StateHistory represents the complete history of states during agent execution
type StateHistory struct { type StateHistory struct {
States []StateDescription `json:"states"` States []StateDescription `json:"states"`
} }
// RunAgent sends a request to run an agent with the given goal type DesktopStateDescription struct {
func (c *Client) RunBrowserAgent(req AgentRequest) (*StateHistory, error) { ScreenContent string `json:"screen_content"`
body, err := json.Marshal(req) ScreenshotPath string `json:"screenshot_path"`
if err != nil { }
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
resp, err := c.httpClient.Post( type DesktopStateHistory struct {
fmt.Sprintf("%s/api/browser/run", c.baseURL), States []DesktopStateDescription `json:"states"`
"application/json", }
bytes.NewBuffer(body),
) type SearchResult struct {
Title string `json:"title"`
URL string `json:"url"`
Description string `json:"description"`
}
type ResearchResult struct {
Topic string `json:"topic"`
Summary string `json:"summary"`
Sources []SearchResult `json:"sources"`
KnowledgeGaps []string `json:"knowledge_gaps"`
SearchQueries []string `json:"search_queries"`
ResearchCycles int `json:"research_cycles"`
CompletionTime time.Duration `json:"completion_time"`
}
func (c *Client) RunBrowserAgent(req AgentRequest) (*StateHistory, error) {
return post[*StateHistory](c.httpClient, c.baseURL+"/api/browser/run", req)
}
func (c *Client) RunDesktopAgent(req DesktopAgentRequest) (*DesktopStateHistory, error) {
return post[*DesktopStateHistory](c.httpClient, c.baseURL+"/api/desktop/run", req)
}
func (c *Client) RunDeepResearch(req DeepResearchRequest) (*ResearchResult, error) {
return post[*ResearchResult](c.httpClient, c.baseURL+"/api/deep-research/run", req)
}
func (c *Client) Readyz() (string, error) {
return c.get("/readyz")
}
func (c *Client) Healthz() (string, error) {
return c.get("/healthz")
}
func (c *Client) get(path string) (string, error) {
resp, err := c.httpClient.Get(c.baseURL + path)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return "", fmt.Errorf("failed to make request: %w", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
} }
var state StateHistory return resp.Status, nil
if err := json.NewDecoder(resp.Body).Decode(&state); err != nil { }
return nil, fmt.Errorf("failed to decode response: %w", err)
} func post[T any](client *http.Client, url string, body interface{}) (T, error) {
var result T
return &state, nil jsonBody, err := json.Marshal(body)
if err != nil {
return result, fmt.Errorf("failed to marshal request body: %w", err)
}
fmt.Println("Sending request", "url", url, "body", string(jsonBody))
resp, err := client.Post(url, "application/json", bytes.NewBuffer(jsonBody))
if err != nil {
return result, fmt.Errorf("failed to make request: %w", err)
}
defer resp.Body.Close()
fmt.Println("Response", "status", resp.StatusCode)
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return result, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return result, fmt.Errorf("failed to decode response: %w", err)
}
return result, nil
} }

View File

@@ -19,6 +19,7 @@ const (
ActionSearch = "search" ActionSearch = "search"
ActionCustom = "custom" ActionCustom = "custom"
ActionBrowserAgentRunner = "browser-agent-runner" ActionBrowserAgentRunner = "browser-agent-runner"
ActionDeepResearchRunner = "deep-research-runner"
ActionGithubIssueLabeler = "github-issue-labeler" ActionGithubIssueLabeler = "github-issue-labeler"
ActionGithubIssueOpener = "github-issue-opener" ActionGithubIssueOpener = "github-issue-opener"
ActionGithubIssueCloser = "github-issue-closer" ActionGithubIssueCloser = "github-issue-closer"
@@ -54,6 +55,7 @@ var AvailableActions = []string{
ActionGithubRepositoryGet, ActionGithubRepositoryGet,
ActionGithubGetAllContent, ActionGithubGetAllContent,
ActionBrowserAgentRunner, ActionBrowserAgentRunner,
ActionDeepResearchRunner,
ActionGithubRepositoryCreateOrUpdate, ActionGithubRepositoryCreateOrUpdate,
ActionGithubIssueReader, ActionGithubIssueReader,
ActionGithubIssueCommenter, ActionGithubIssueCommenter,
@@ -121,6 +123,8 @@ func Action(name, agentName string, config map[string]string, pool *state.AgentP
a = actions.NewGithubIssueSearch(config) a = actions.NewGithubIssueSearch(config)
case ActionBrowserAgentRunner: case ActionBrowserAgentRunner:
a = actions.NewBrowserAgentRunner(config, actionsConfigs["browser-agent-runner-base-url"]) a = actions.NewBrowserAgentRunner(config, actionsConfigs["browser-agent-runner-base-url"])
case ActionDeepResearchRunner:
a = actions.NewDeepResearchRunner(config, actionsConfigs["deep-research-runner-base-url"])
case ActionGithubIssueReader: case ActionGithubIssueReader:
a = actions.NewGithubIssueReader(config) a = actions.NewGithubIssueReader(config)
case ActionGithubPRReader: case ActionGithubPRReader:
@@ -181,6 +185,11 @@ func ActionsConfigMeta() []config.FieldGroup {
Label: "Browser Agent Runner", Label: "Browser Agent Runner",
Fields: actions.BrowserAgentRunnerConfigMeta(), Fields: actions.BrowserAgentRunnerConfigMeta(),
}, },
{
Name: "deep-research-runner",
Label: "Deep Research Runner",
Fields: actions.DeepResearchRunnerConfigMeta(),
},
{ {
Name: "generate_image", Name: "generate_image",
Label: "Generate Image", Label: "Generate Image",

View File

@@ -0,0 +1,129 @@
package actions
import (
"context"
"fmt"
"github.com/mudler/LocalAGI/core/types"
"github.com/mudler/LocalAGI/pkg/config"
api "github.com/mudler/LocalAGI/pkg/localoperator"
"github.com/sashabaranov/go-openai/jsonschema"
)
const (
MetadataDeepResearchResult = "deep_research_result"
)
type DeepResearchRunner struct {
baseURL, customActionName string
client *api.Client
}
func NewDeepResearchRunner(config map[string]string, defaultURL string) *DeepResearchRunner {
if config["baseURL"] == "" {
config["baseURL"] = defaultURL
}
client := api.NewClient(config["baseURL"])
return &DeepResearchRunner{
client: client,
baseURL: config["baseURL"],
customActionName: config["customActionName"],
}
}
func (d *DeepResearchRunner) Run(ctx context.Context, params types.ActionParams) (types.ActionResult, error) {
result := api.DeepResearchRequest{}
err := params.Unmarshal(&result)
if err != nil {
return types.ActionResult{}, fmt.Errorf("failed to unmarshal params: %w", err)
}
req := api.DeepResearchRequest{
Topic: result.Topic,
MaxCycles: result.MaxCycles,
MaxNoActionAttempts: result.MaxNoActionAttempts,
MaxResults: result.MaxResults,
}
researchResult, err := d.client.RunDeepResearch(req)
if err != nil {
return types.ActionResult{}, fmt.Errorf("failed to run deep research: %w", err)
}
// Format the research result into a readable string
var resultStr string
resultStr += "Deep research result\n"
resultStr += fmt.Sprintf("Topic: %s\n", researchResult.Topic)
resultStr += fmt.Sprintf("Summary: %s\n", researchResult.Summary)
resultStr += fmt.Sprintf("Research Cycles: %d\n", researchResult.ResearchCycles)
resultStr += fmt.Sprintf("Completion Time: %s\n\n", researchResult.CompletionTime)
if len(researchResult.Sources) > 0 {
resultStr += "Sources:\n"
for _, source := range researchResult.Sources {
resultStr += fmt.Sprintf("- %s (%s)\n %s\n", source.Title, source.URL, source.Description)
}
}
return types.ActionResult{
Result: fmt.Sprintf("Deep research completed successfully.\n%s", resultStr),
Metadata: map[string]interface{}{MetadataDeepResearchResult: researchResult},
}, nil
}
func (d *DeepResearchRunner) Definition() types.ActionDefinition {
actionName := "run_deep_research"
if d.customActionName != "" {
actionName = d.customActionName
}
description := "Run a deep research on a specific topic, gathering information from multiple sources and providing a comprehensive summary"
return types.ActionDefinition{
Name: types.ActionDefinitionName(actionName),
Description: description,
Properties: map[string]jsonschema.Definition{
"topic": {
Type: jsonschema.String,
Description: "The topic to research",
},
"max_cycles": {
Type: jsonschema.Number,
Description: "Maximum number of research cycles to perform (optional)",
},
"max_no_action_attempts": {
Type: jsonschema.Number,
Description: "Maximum number of attempts without taking an action (optional)",
},
"max_results": {
Type: jsonschema.Number,
Description: "Maximum number of results to collect (optional)",
},
},
Required: []string{"topic"},
}
}
func (d *DeepResearchRunner) Plannable() bool {
return true
}
// DeepResearchRunnerConfigMeta returns the metadata for Deep Research Runner action configuration fields
func DeepResearchRunnerConfigMeta() []config.Field {
return []config.Field{
{
Name: "baseURL",
Label: "Base URL",
Type: config.FieldTypeText,
Required: false,
HelpText: "Base URL of the LocalOperator API",
},
{
Name: "customActionName",
Label: "Custom Action Name",
Type: config.FieldTypeText,
HelpText: "Custom name for this action",
},
}
}