diff --git a/main.go b/main.go index a2a7eea..d375486 100644 --- a/main.go +++ b/main.go @@ -22,6 +22,7 @@ var withLogs = os.Getenv("LOCALAGI_ENABLE_CONVERSATIONS_LOGGING") == "true" var apiKeysEnv = os.Getenv("LOCALAGI_API_KEYS") var imageModel = os.Getenv("LOCALAGI_IMAGE_MODEL") var conversationDuration = os.Getenv("LOCALAGI_CONVERSATION_DURATION") +var localOperatorBaseURL = os.Getenv("LOCALOPERATOR_BASE_URL") func init() { if baseModel == "" { @@ -61,7 +62,9 @@ func main() { apiKey, stateDir, localRAG, - services.Actions, + services.Actions(map[string]string{ + "browser-agent-runner-base-url": localOperatorBaseURL, + }), services.Connectors, services.DynamicPrompts, timeout, diff --git a/pkg/localoperator/client.go b/pkg/localoperator/client.go new file mode 100644 index 0000000..f7df16e --- /dev/null +++ b/pkg/localoperator/client.go @@ -0,0 +1,70 @@ +package api + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" +) + +// Client represents a client for interacting with the LocalOperator API +type Client struct { + baseURL string + httpClient *http.Client +} + +// NewClient creates a new API client +func NewClient(baseURL string) *Client { + return &Client{ + baseURL: baseURL, + httpClient: &http.Client{}, + } +} + +// AgentRequest represents the request body for running an agent +type AgentRequest struct { + Goal string `json:"goal"` + MaxAttempts int `json:"max_attempts,omitempty"` + MaxNoActionAttempts int `json:"max_no_action_attempts,omitempty"` +} + +// StateDescription represents a single state in the agent's history +type StateDescription struct { + CurrentURL string `json:"current_url"` + PageTitle string `json:"page_title"` + PageContentDescription string `json:"page_content_description"` +} + +// StateHistory represents the complete history of states during agent execution +type StateHistory struct { + States []StateDescription `json:"states"` +} + +// RunAgent sends a request to run an agent with the given goal +func (c *Client) RunBrowserAgent(req AgentRequest) (*StateHistory, error) { + body, err := json.Marshal(req) + if err != nil { + return nil, fmt.Errorf("failed to marshal request: %w", err) + } + + resp, err := c.httpClient.Post( + fmt.Sprintf("%s/api/browser/run", c.baseURL), + "application/json", + bytes.NewBuffer(body), + ) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var state StateHistory + if err := json.NewDecoder(resp.Body).Decode(&state); err != nil { + return nil, fmt.Errorf("failed to decode response: %w", err) + } + + return &state, nil +} diff --git a/services/actions.go b/services/actions.go index c64464a..1d77124 100644 --- a/services/actions.go +++ b/services/actions.go @@ -18,6 +18,7 @@ const ( // Actions ActionSearch = "search" ActionCustom = "custom" + ActionBrowserAgentRunner = "browser-agent-runner" ActionGithubIssueLabeler = "github-issue-labeler" ActionGithubIssueOpener = "github-issue-opener" ActionGithubIssueCloser = "github-issue-closer" @@ -52,6 +53,7 @@ var AvailableActions = []string{ ActionGithubIssueSearcher, ActionGithubRepositoryGet, ActionGithubGetAllContent, + ActionBrowserAgentRunner, ActionGithubRepositoryCreateOrUpdate, ActionGithubIssueReader, ActionGithubIssueCommenter, @@ -71,31 +73,34 @@ var AvailableActions = []string{ ActionShellcommand, } -func Actions(a *state.AgentConfig) func(ctx context.Context, pool *state.AgentPool) []types.Action { - return func(ctx context.Context, pool *state.AgentPool) []types.Action { - allActions := []types.Action{} +func Actions(actionsConfigs map[string]string) func(a *state.AgentConfig) func(ctx context.Context, pool *state.AgentPool) []types.Action { + return func(a *state.AgentConfig) func(ctx context.Context, pool *state.AgentPool) []types.Action { + return func(ctx context.Context, pool *state.AgentPool) []types.Action { + allActions := []types.Action{} - agentName := a.Name + agentName := a.Name - for _, a := range a.Actions { - var config map[string]string - if err := json.Unmarshal([]byte(a.Config), &config); err != nil { - xlog.Error("Error unmarshalling action config", "error", err) - continue + for _, a := range a.Actions { + var config map[string]string + if err := json.Unmarshal([]byte(a.Config), &config); err != nil { + xlog.Error("Error unmarshalling action config", "error", err) + continue + } + + a, err := Action(a.Name, agentName, config, pool, actionsConfigs) + if err != nil { + continue + } + allActions = append(allActions, a) } - a, err := Action(a.Name, agentName, config, pool) - if err != nil { - continue - } - allActions = append(allActions, a) + return allActions } - - return allActions } + } -func Action(name, agentName string, config map[string]string, pool *state.AgentPool) (types.Action, error) { +func Action(name, agentName string, config map[string]string, pool *state.AgentPool, actionsConfigs map[string]string) (types.Action, error) { var a types.Action var err error @@ -114,6 +119,8 @@ func Action(name, agentName string, config map[string]string, pool *state.AgentP a = actions.NewGithubIssueCloser(config) case ActionGithubIssueSearcher: a = actions.NewGithubIssueSearch(config) + case ActionBrowserAgentRunner: + a = actions.NewBrowserAgentRunner(config, actionsConfigs["browser-agent-runner-base-url"]) case ActionGithubIssueReader: a = actions.NewGithubIssueReader(config) case ActionGithubPRReader: @@ -169,6 +176,11 @@ func ActionsConfigMeta() []config.FieldGroup { Label: "Search", Fields: actions.SearchConfigMeta(), }, + { + Name: "browser-agent-runner", + Label: "Browser Agent Runner", + Fields: actions.BrowserAgentRunnerConfigMeta(), + }, { Name: "generate_image", Label: "Generate Image", diff --git a/services/actions/browseragentrunner.go b/services/actions/browseragentrunner.go new file mode 100644 index 0000000..160b5d5 --- /dev/null +++ b/services/actions/browseragentrunner.go @@ -0,0 +1,116 @@ +package actions + +import ( + "context" + "fmt" + + "github.com/mudler/LocalAGI/core/types" + "github.com/mudler/LocalAGI/pkg/config" + api "github.com/mudler/LocalAGI/pkg/localoperator" + "github.com/sashabaranov/go-openai/jsonschema" +) + +type BrowserAgentRunner struct { + baseURL, customActionName string + client *api.Client +} + +func NewBrowserAgentRunner(config map[string]string, defaultURL string) *BrowserAgentRunner { + if config["baseURL"] == "" { + config["baseURL"] = defaultURL + } + + client := api.NewClient(config["baseURL"]) + + return &BrowserAgentRunner{ + client: client, + baseURL: config["baseURL"], + customActionName: config["customActionName"], + } +} + +func (b *BrowserAgentRunner) Run(ctx context.Context, params types.ActionParams) (types.ActionResult, error) { + result := api.AgentRequest{} + err := params.Unmarshal(&result) + if err != nil { + return types.ActionResult{}, fmt.Errorf("failed to unmarshal params: %w", err) + } + + req := api.AgentRequest{ + Goal: result.Goal, + MaxAttempts: result.MaxAttempts, + MaxNoActionAttempts: result.MaxNoActionAttempts, + } + + stateHistory, err := b.client.RunBrowserAgent(req) + if err != nil { + return types.ActionResult{}, fmt.Errorf("failed to run browser agent: %w", err) + } + + // Format the state history into a readable string + var historyStr string + // for i, state := range stateHistory.States { + // historyStr += fmt.Sprintf("State %d:\n", i+1) + // historyStr += fmt.Sprintf(" URL: %s\n", state.CurrentURL) + // historyStr += fmt.Sprintf(" Title: %s\n", state.PageTitle) + // historyStr += fmt.Sprintf(" Description: %s\n\n", state.PageContentDescription) + // } + + historyStr += fmt.Sprintf(" URL: %s\n", stateHistory.States[len(stateHistory.States)-1].CurrentURL) + historyStr += fmt.Sprintf(" Title: %s\n", stateHistory.States[len(stateHistory.States)-1].PageTitle) + historyStr += fmt.Sprintf(" Description: %s\n\n", stateHistory.States[len(stateHistory.States)-1].PageContentDescription) + + return types.ActionResult{ + Result: fmt.Sprintf("Browser agent completed successfully. History:\n%s", historyStr), + }, nil +} + +func (b *BrowserAgentRunner) Definition() types.ActionDefinition { + actionName := "run_browser_agent" + if b.customActionName != "" { + actionName = b.customActionName + } + description := "Run a browser agent to achieve a specific goal, for example: 'Go to https://www.google.com and search for 'LocalAI', and tell me what's on the first page'" + return types.ActionDefinition{ + Name: types.ActionDefinitionName(actionName), + Description: description, + Properties: map[string]jsonschema.Definition{ + "goal": { + Type: jsonschema.String, + Description: "The goal for the browser agent to achieve", + }, + "max_attempts": { + Type: jsonschema.Number, + Description: "Maximum number of attempts the agent can make (optional)", + }, + "max_no_action_attempts": { + Type: jsonschema.Number, + Description: "Maximum number of attempts without taking an action (optional)", + }, + }, + Required: []string{"goal"}, + } +} + +func (a *BrowserAgentRunner) Plannable() bool { + return true +} + +// BrowserAgentRunnerConfigMeta returns the metadata for Browser Agent Runner action configuration fields +func BrowserAgentRunnerConfigMeta() []config.Field { + return []config.Field{ + { + Name: "baseURL", + Label: "Base URL", + Type: config.FieldTypeText, + Required: false, + HelpText: "Base URL of the LocalOperator API", + }, + { + Name: "customActionName", + Label: "Custom Action Name", + Type: config.FieldTypeText, + HelpText: "Custom name for this action", + }, + } +} diff --git a/webui/app.go b/webui/app.go index ad9d362..ec95f2a 100644 --- a/webui/app.go +++ b/webui/app.go @@ -444,7 +444,7 @@ func (a *App) ExecuteAction(pool *state.AgentPool) func(c *fiber.Ctx) error { actionName := c.Params("name") xlog.Debug("Executing action", "action", actionName, "config", payload.Config, "params", payload.Params) - a, err := services.Action(actionName, "", payload.Config, pool) + a, err := services.Action(actionName, "", payload.Config, pool, map[string]string{}) if err != nil { xlog.Error("Error creating action", "error", err) return errorJSONMessage(c, err.Error())