feat: add deep research action (#91)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-04-29 08:46:55 +02:00
committed by GitHub
parent f3884c0244
commit 29f7644577
4 changed files with 244 additions and 28 deletions

View File

@@ -66,6 +66,7 @@ func main() {
localRAG,
services.Actions(map[string]string{
"browser-agent-runner-base-url": localOperatorBaseURL,
"deep-research-runner-base-url": localOperatorBaseURL,
}),
services.Connectors,
services.DynamicPrompts,

View File

@@ -4,69 +4,146 @@ import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// Client represents a client for interacting with the LocalOperator API
type Client struct {
baseURL string
httpClient *http.Client
}
// NewClient creates a new API client
func NewClient(baseURL string) *Client {
func NewClient(baseURL string, timeout ...time.Duration) *Client {
defaultTimeout := 30 * time.Second
if len(timeout) > 0 {
defaultTimeout = timeout[0]
}
return &Client{
baseURL: baseURL,
httpClient: &http.Client{},
httpClient: &http.Client{
Timeout: defaultTimeout,
},
}
}
// AgentRequest represents the request body for running an agent
type AgentRequest struct {
Goal string `json:"goal"`
MaxAttempts int `json:"max_attempts,omitempty"`
MaxNoActionAttempts int `json:"max_no_action_attempts,omitempty"`
}
// StateDescription represents a single state in the agent's history
type DesktopAgentRequest struct {
AgentRequest
DesktopURL string `json:"desktop_url"`
}
type DeepResearchRequest struct {
Topic string `json:"topic"`
MaxCycles int `json:"max_cycles,omitempty"`
MaxNoActionAttempts int `json:"max_no_action_attempts,omitempty"`
MaxResults int `json:"max_results,omitempty"`
}
// Response types
type StateDescription struct {
CurrentURL string `json:"current_url"`
PageTitle string `json:"page_title"`
PageContentDescription string `json:"page_content_description"`
Screenshot string `json:"screenshot"`
ScreenshotMimeType string `json:"screenshot_mime_type"` // MIME type of the screenshot (e.g., "image/png")
ScreenshotMimeType string `json:"screenshot_mime_type"`
}
// StateHistory represents the complete history of states during agent execution
type StateHistory struct {
States []StateDescription `json:"states"`
}
// RunAgent sends a request to run an agent with the given goal
func (c *Client) RunBrowserAgent(req AgentRequest) (*StateHistory, error) {
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
type DesktopStateDescription struct {
ScreenContent string `json:"screen_content"`
ScreenshotPath string `json:"screenshot_path"`
}
resp, err := c.httpClient.Post(
fmt.Sprintf("%s/api/browser/run", c.baseURL),
"application/json",
bytes.NewBuffer(body),
)
type DesktopStateHistory struct {
States []DesktopStateDescription `json:"states"`
}
type SearchResult struct {
Title string `json:"title"`
URL string `json:"url"`
Description string `json:"description"`
}
type ResearchResult struct {
Topic string `json:"topic"`
Summary string `json:"summary"`
Sources []SearchResult `json:"sources"`
KnowledgeGaps []string `json:"knowledge_gaps"`
SearchQueries []string `json:"search_queries"`
ResearchCycles int `json:"research_cycles"`
CompletionTime time.Duration `json:"completion_time"`
}
func (c *Client) RunBrowserAgent(req AgentRequest) (*StateHistory, error) {
return post[*StateHistory](c.httpClient, c.baseURL+"/api/browser/run", req)
}
func (c *Client) RunDesktopAgent(req DesktopAgentRequest) (*DesktopStateHistory, error) {
return post[*DesktopStateHistory](c.httpClient, c.baseURL+"/api/desktop/run", req)
}
func (c *Client) RunDeepResearch(req DeepResearchRequest) (*ResearchResult, error) {
return post[*ResearchResult](c.httpClient, c.baseURL+"/api/deep-research/run", req)
}
func (c *Client) Readyz() (string, error) {
return c.get("/readyz")
}
func (c *Client) Healthz() (string, error) {
return c.get("/healthz")
}
func (c *Client) get(path string) (string, error) {
resp, err := c.httpClient.Get(c.baseURL + path)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
return "", fmt.Errorf("failed to make request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
}
var state StateHistory
if err := json.NewDecoder(resp.Body).Decode(&state); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return &state, nil
return resp.Status, nil
}
func post[T any](client *http.Client, url string, body interface{}) (T, error) {
var result T
jsonBody, err := json.Marshal(body)
if err != nil {
return result, fmt.Errorf("failed to marshal request body: %w", err)
}
fmt.Println("Sending request", "url", url, "body", string(jsonBody))
resp, err := client.Post(url, "application/json", bytes.NewBuffer(jsonBody))
if err != nil {
return result, fmt.Errorf("failed to make request: %w", err)
}
defer resp.Body.Close()
fmt.Println("Response", "status", resp.StatusCode)
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return result, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return result, fmt.Errorf("failed to decode response: %w", err)
}
return result, nil
}

View File

@@ -19,6 +19,7 @@ const (
ActionSearch = "search"
ActionCustom = "custom"
ActionBrowserAgentRunner = "browser-agent-runner"
ActionDeepResearchRunner = "deep-research-runner"
ActionGithubIssueLabeler = "github-issue-labeler"
ActionGithubIssueOpener = "github-issue-opener"
ActionGithubIssueCloser = "github-issue-closer"
@@ -54,6 +55,7 @@ var AvailableActions = []string{
ActionGithubRepositoryGet,
ActionGithubGetAllContent,
ActionBrowserAgentRunner,
ActionDeepResearchRunner,
ActionGithubRepositoryCreateOrUpdate,
ActionGithubIssueReader,
ActionGithubIssueCommenter,
@@ -121,6 +123,8 @@ func Action(name, agentName string, config map[string]string, pool *state.AgentP
a = actions.NewGithubIssueSearch(config)
case ActionBrowserAgentRunner:
a = actions.NewBrowserAgentRunner(config, actionsConfigs["browser-agent-runner-base-url"])
case ActionDeepResearchRunner:
a = actions.NewDeepResearchRunner(config, actionsConfigs["deep-research-runner-base-url"])
case ActionGithubIssueReader:
a = actions.NewGithubIssueReader(config)
case ActionGithubPRReader:
@@ -181,6 +185,11 @@ func ActionsConfigMeta() []config.FieldGroup {
Label: "Browser Agent Runner",
Fields: actions.BrowserAgentRunnerConfigMeta(),
},
{
Name: "deep-research-runner",
Label: "Deep Research Runner",
Fields: actions.DeepResearchRunnerConfigMeta(),
},
{
Name: "generate_image",
Label: "Generate Image",

View File

@@ -0,0 +1,129 @@
package actions
import (
"context"
"fmt"
"github.com/mudler/LocalAGI/core/types"
"github.com/mudler/LocalAGI/pkg/config"
api "github.com/mudler/LocalAGI/pkg/localoperator"
"github.com/sashabaranov/go-openai/jsonschema"
)
const (
MetadataDeepResearchResult = "deep_research_result"
)
type DeepResearchRunner struct {
baseURL, customActionName string
client *api.Client
}
func NewDeepResearchRunner(config map[string]string, defaultURL string) *DeepResearchRunner {
if config["baseURL"] == "" {
config["baseURL"] = defaultURL
}
client := api.NewClient(config["baseURL"])
return &DeepResearchRunner{
client: client,
baseURL: config["baseURL"],
customActionName: config["customActionName"],
}
}
func (d *DeepResearchRunner) Run(ctx context.Context, params types.ActionParams) (types.ActionResult, error) {
result := api.DeepResearchRequest{}
err := params.Unmarshal(&result)
if err != nil {
return types.ActionResult{}, fmt.Errorf("failed to unmarshal params: %w", err)
}
req := api.DeepResearchRequest{
Topic: result.Topic,
MaxCycles: result.MaxCycles,
MaxNoActionAttempts: result.MaxNoActionAttempts,
MaxResults: result.MaxResults,
}
researchResult, err := d.client.RunDeepResearch(req)
if err != nil {
return types.ActionResult{}, fmt.Errorf("failed to run deep research: %w", err)
}
// Format the research result into a readable string
var resultStr string
resultStr += "Deep research result\n"
resultStr += fmt.Sprintf("Topic: %s\n", researchResult.Topic)
resultStr += fmt.Sprintf("Summary: %s\n", researchResult.Summary)
resultStr += fmt.Sprintf("Research Cycles: %d\n", researchResult.ResearchCycles)
resultStr += fmt.Sprintf("Completion Time: %s\n\n", researchResult.CompletionTime)
if len(researchResult.Sources) > 0 {
resultStr += "Sources:\n"
for _, source := range researchResult.Sources {
resultStr += fmt.Sprintf("- %s (%s)\n %s\n", source.Title, source.URL, source.Description)
}
}
return types.ActionResult{
Result: fmt.Sprintf("Deep research completed successfully.\n%s", resultStr),
Metadata: map[string]interface{}{MetadataDeepResearchResult: researchResult},
}, nil
}
func (d *DeepResearchRunner) Definition() types.ActionDefinition {
actionName := "run_deep_research"
if d.customActionName != "" {
actionName = d.customActionName
}
description := "Run a deep research on a specific topic, gathering information from multiple sources and providing a comprehensive summary"
return types.ActionDefinition{
Name: types.ActionDefinitionName(actionName),
Description: description,
Properties: map[string]jsonschema.Definition{
"topic": {
Type: jsonschema.String,
Description: "The topic to research",
},
"max_cycles": {
Type: jsonschema.Number,
Description: "Maximum number of research cycles to perform (optional)",
},
"max_no_action_attempts": {
Type: jsonschema.Number,
Description: "Maximum number of attempts without taking an action (optional)",
},
"max_results": {
Type: jsonschema.Number,
Description: "Maximum number of results to collect (optional)",
},
},
Required: []string{"topic"},
}
}
func (d *DeepResearchRunner) Plannable() bool {
return true
}
// DeepResearchRunnerConfigMeta returns the metadata for Deep Research Runner action configuration fields
func DeepResearchRunnerConfigMeta() []config.Field {
return []config.Field{
{
Name: "baseURL",
Label: "Base URL",
Type: config.FieldTypeText,
Required: false,
HelpText: "Base URL of the LocalOperator API",
},
{
Name: "customActionName",
Label: "Custom Action Name",
Type: config.FieldTypeText,
HelpText: "Custom name for this action",
},
}
}