Compare commits

..

4 Commits

Author SHA1 Message Date
Ettore Di Giacinto
69f047ed62 fix: simplify tests to run faster
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-22 09:40:59 +02:00
Ettore Di Giacinto
ef90ecd812 chore: default to gemma-3-12b-it-qat
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-19 22:10:37 +02:00
Ettore Di Giacinto
50e56fe22f feat(browseragent): add browser agent runner action (#55)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-04-18 22:42:17 +02:00
Richard Palethorpe
b5a12a1da6 feat(ui): Structured observability/status view (#40)
* refactor(ui): Make message status SSE name more specific

Signed-off-by: Richard Palethorpe <io@richiejp.com>

* feat(ui): Add structured observability events

Signed-off-by: Richard Palethorpe <io@richiejp.com>

---------

Signed-off-by: Richard Palethorpe <io@richiejp.com>
2025-04-18 17:32:43 +02:00
9 changed files with 236 additions and 29 deletions

View File

@@ -9,7 +9,7 @@ cleanup-tests:
docker compose down
tests: prepare-tests
LOCALAGI_MODEL="arcee-agent" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./...
LOCALAGI_MODEL="gemma-3-12b-it-qat" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./...
run-nokb:
$(MAKE) run KBDISABLEINDEX=true

View File

@@ -114,7 +114,7 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil
- Supports text, multimodal, and image generation models
- Run with: `docker compose -f docker-compose.nvidia.yaml up`
- Default models:
- Text: `arcee-agent`
- Text: `gemma-3-12b-it-qat`
- Multimodal: `minicpm-v-2_6`
- Image: `sd-1.5-ggml`
- Environment variables:
@@ -130,7 +130,7 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil
- Supports text, multimodal, and image generation models
- Run with: `docker compose -f docker-compose.intel.yaml up`
- Default models:
- Text: `arcee-agent`
- Text: `gemma-3-12b-it-qat`
- Multimodal: `minicpm-v-2_6`
- Image: `sd-1.5-ggml`
- Environment variables:
@@ -161,7 +161,7 @@ docker compose -f docker-compose.intel.yaml up
```
If no models are specified, it will use the defaults:
- Text model: `arcee-agent`
- Text model: `gemma-3-12b-it-qat`
- Multimodal model: `minicpm-v-2_6`
- Image model: `sd-1.5-ggml`

View File

@@ -226,7 +226,10 @@ var _ = Describe("Agent test", func() {
WithLLMAPIKey(apiKeyURL),
WithTimeout("10m"),
WithActions(
actions.NewSearch(map[string]string{}),
&TestAction{response: map[string]string{
"boston": testActionResult,
"milan": testActionResult2,
}},
),
EnablePlanning,
EnableForceReasoning,
@@ -238,18 +241,21 @@ var _ = Describe("Agent test", func() {
defer agent.Stop()
result := agent.Ask(
types.WithText("Thoroughly plan a trip to San Francisco from Venice, Italy; check flight times, visa requirements and whether electrical items are allowed in cabin luggage."),
types.WithText("Use the plan tool to do two actions in sequence: search for the weather in boston and search for the weather in milan"),
)
Expect(len(result.State)).To(BeNumerically(">", 1))
actionsExecuted := []string{}
actionResults := []string{}
for _, r := range result.State {
xlog.Info(r.Result)
actionsExecuted = append(actionsExecuted, r.Action.Definition().Name.String())
actionResults = append(actionResults, r.ActionResult.Result)
}
Expect(actionsExecuted).To(ContainElement("search_internet"), fmt.Sprint(result))
Expect(actionsExecuted).To(ContainElement("get_weather"), fmt.Sprint(result))
Expect(actionsExecuted).To(ContainElement("plan"), fmt.Sprint(result))
Expect(actionResults).To(ContainElement(testActionResult), fmt.Sprint(result))
Expect(actionResults).To(ContainElement(testActionResult2), fmt.Sprint(result))
})
It("Can initiate conversations", func() {

View File

@@ -7,7 +7,7 @@ services:
# Image list (dockerhub): https://hub.docker.com/r/localai/localai
image: localai/localai:master-ffmpeg-core
command:
- ${MODEL_NAME:-arcee-agent}
- ${MODEL_NAME:-gemma-3-12b-it-qat}
- ${MULTIMODAL_MODEL:-minicpm-v-2_6}
- ${IMAGE_MODEL:-sd-1.5-ggml}
- granite-embedding-107m-multilingual
@@ -59,7 +59,7 @@ services:
- 8080:3000
#image: quay.io/mudler/localagi:master
environment:
- LOCALAGI_MODEL=${MODEL_NAME:-arcee-agent}
- LOCALAGI_MODEL=${MODEL_NAME:-gemma-3-12b-it-qat}
- LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-minicpm-v-2_6}
- LOCALAGI_IMAGE_MODEL=${IMAGE_MODEL:-sd-1.5-ggml}
- LOCALAGI_LLM_API_URL=http://localai:8080

View File

@@ -22,6 +22,7 @@ var withLogs = os.Getenv("LOCALAGI_ENABLE_CONVERSATIONS_LOGGING") == "true"
var apiKeysEnv = os.Getenv("LOCALAGI_API_KEYS")
var imageModel = os.Getenv("LOCALAGI_IMAGE_MODEL")
var conversationDuration = os.Getenv("LOCALAGI_CONVERSATION_DURATION")
var localOperatorBaseURL = os.Getenv("LOCALOPERATOR_BASE_URL")
func init() {
if baseModel == "" {
@@ -61,7 +62,9 @@ func main() {
apiKey,
stateDir,
localRAG,
services.Actions,
services.Actions(map[string]string{
"browser-agent-runner-base-url": localOperatorBaseURL,
}),
services.Connectors,
services.DynamicPrompts,
timeout,

View File

@@ -0,0 +1,70 @@
package api
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
)
// Client represents a client for interacting with the LocalOperator API
type Client struct {
baseURL string
httpClient *http.Client
}
// NewClient creates a new API client
func NewClient(baseURL string) *Client {
return &Client{
baseURL: baseURL,
httpClient: &http.Client{},
}
}
// AgentRequest represents the request body for running an agent
type AgentRequest struct {
Goal string `json:"goal"`
MaxAttempts int `json:"max_attempts,omitempty"`
MaxNoActionAttempts int `json:"max_no_action_attempts,omitempty"`
}
// StateDescription represents a single state in the agent's history
type StateDescription struct {
CurrentURL string `json:"current_url"`
PageTitle string `json:"page_title"`
PageContentDescription string `json:"page_content_description"`
}
// StateHistory represents the complete history of states during agent execution
type StateHistory struct {
States []StateDescription `json:"states"`
}
// RunAgent sends a request to run an agent with the given goal
func (c *Client) RunBrowserAgent(req AgentRequest) (*StateHistory, error) {
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("failed to marshal request: %w", err)
}
resp, err := c.httpClient.Post(
fmt.Sprintf("%s/api/browser/run", c.baseURL),
"application/json",
bytes.NewBuffer(body),
)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
var state StateHistory
if err := json.NewDecoder(resp.Body).Decode(&state); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return &state, nil
}

View File

@@ -18,6 +18,7 @@ const (
// Actions
ActionSearch = "search"
ActionCustom = "custom"
ActionBrowserAgentRunner = "browser-agent-runner"
ActionGithubIssueLabeler = "github-issue-labeler"
ActionGithubIssueOpener = "github-issue-opener"
ActionGithubIssueCloser = "github-issue-closer"
@@ -52,6 +53,7 @@ var AvailableActions = []string{
ActionGithubIssueSearcher,
ActionGithubRepositoryGet,
ActionGithubGetAllContent,
ActionBrowserAgentRunner,
ActionGithubRepositoryCreateOrUpdate,
ActionGithubIssueReader,
ActionGithubIssueCommenter,
@@ -71,31 +73,34 @@ var AvailableActions = []string{
ActionShellcommand,
}
func Actions(a *state.AgentConfig) func(ctx context.Context, pool *state.AgentPool) []types.Action {
return func(ctx context.Context, pool *state.AgentPool) []types.Action {
allActions := []types.Action{}
func Actions(actionsConfigs map[string]string) func(a *state.AgentConfig) func(ctx context.Context, pool *state.AgentPool) []types.Action {
return func(a *state.AgentConfig) func(ctx context.Context, pool *state.AgentPool) []types.Action {
return func(ctx context.Context, pool *state.AgentPool) []types.Action {
allActions := []types.Action{}
agentName := a.Name
agentName := a.Name
for _, a := range a.Actions {
var config map[string]string
if err := json.Unmarshal([]byte(a.Config), &config); err != nil {
xlog.Error("Error unmarshalling action config", "error", err)
continue
for _, a := range a.Actions {
var config map[string]string
if err := json.Unmarshal([]byte(a.Config), &config); err != nil {
xlog.Error("Error unmarshalling action config", "error", err)
continue
}
a, err := Action(a.Name, agentName, config, pool, actionsConfigs)
if err != nil {
continue
}
allActions = append(allActions, a)
}
a, err := Action(a.Name, agentName, config, pool)
if err != nil {
continue
}
allActions = append(allActions, a)
return allActions
}
return allActions
}
}
func Action(name, agentName string, config map[string]string, pool *state.AgentPool) (types.Action, error) {
func Action(name, agentName string, config map[string]string, pool *state.AgentPool, actionsConfigs map[string]string) (types.Action, error) {
var a types.Action
var err error
@@ -114,6 +119,8 @@ func Action(name, agentName string, config map[string]string, pool *state.AgentP
a = actions.NewGithubIssueCloser(config)
case ActionGithubIssueSearcher:
a = actions.NewGithubIssueSearch(config)
case ActionBrowserAgentRunner:
a = actions.NewBrowserAgentRunner(config, actionsConfigs["browser-agent-runner-base-url"])
case ActionGithubIssueReader:
a = actions.NewGithubIssueReader(config)
case ActionGithubPRReader:
@@ -169,6 +176,11 @@ func ActionsConfigMeta() []config.FieldGroup {
Label: "Search",
Fields: actions.SearchConfigMeta(),
},
{
Name: "browser-agent-runner",
Label: "Browser Agent Runner",
Fields: actions.BrowserAgentRunnerConfigMeta(),
},
{
Name: "generate_image",
Label: "Generate Image",

View File

@@ -0,0 +1,116 @@
package actions
import (
"context"
"fmt"
"github.com/mudler/LocalAGI/core/types"
"github.com/mudler/LocalAGI/pkg/config"
api "github.com/mudler/LocalAGI/pkg/localoperator"
"github.com/sashabaranov/go-openai/jsonschema"
)
type BrowserAgentRunner struct {
baseURL, customActionName string
client *api.Client
}
func NewBrowserAgentRunner(config map[string]string, defaultURL string) *BrowserAgentRunner {
if config["baseURL"] == "" {
config["baseURL"] = defaultURL
}
client := api.NewClient(config["baseURL"])
return &BrowserAgentRunner{
client: client,
baseURL: config["baseURL"],
customActionName: config["customActionName"],
}
}
func (b *BrowserAgentRunner) Run(ctx context.Context, params types.ActionParams) (types.ActionResult, error) {
result := api.AgentRequest{}
err := params.Unmarshal(&result)
if err != nil {
return types.ActionResult{}, fmt.Errorf("failed to unmarshal params: %w", err)
}
req := api.AgentRequest{
Goal: result.Goal,
MaxAttempts: result.MaxAttempts,
MaxNoActionAttempts: result.MaxNoActionAttempts,
}
stateHistory, err := b.client.RunBrowserAgent(req)
if err != nil {
return types.ActionResult{}, fmt.Errorf("failed to run browser agent: %w", err)
}
// Format the state history into a readable string
var historyStr string
// for i, state := range stateHistory.States {
// historyStr += fmt.Sprintf("State %d:\n", i+1)
// historyStr += fmt.Sprintf(" URL: %s\n", state.CurrentURL)
// historyStr += fmt.Sprintf(" Title: %s\n", state.PageTitle)
// historyStr += fmt.Sprintf(" Description: %s\n\n", state.PageContentDescription)
// }
historyStr += fmt.Sprintf(" URL: %s\n", stateHistory.States[len(stateHistory.States)-1].CurrentURL)
historyStr += fmt.Sprintf(" Title: %s\n", stateHistory.States[len(stateHistory.States)-1].PageTitle)
historyStr += fmt.Sprintf(" Description: %s\n\n", stateHistory.States[len(stateHistory.States)-1].PageContentDescription)
return types.ActionResult{
Result: fmt.Sprintf("Browser agent completed successfully. History:\n%s", historyStr),
}, nil
}
func (b *BrowserAgentRunner) Definition() types.ActionDefinition {
actionName := "run_browser_agent"
if b.customActionName != "" {
actionName = b.customActionName
}
description := "Run a browser agent to achieve a specific goal, for example: 'Go to https://www.google.com and search for 'LocalAI', and tell me what's on the first page'"
return types.ActionDefinition{
Name: types.ActionDefinitionName(actionName),
Description: description,
Properties: map[string]jsonschema.Definition{
"goal": {
Type: jsonschema.String,
Description: "The goal for the browser agent to achieve",
},
"max_attempts": {
Type: jsonschema.Number,
Description: "Maximum number of attempts the agent can make (optional)",
},
"max_no_action_attempts": {
Type: jsonschema.Number,
Description: "Maximum number of attempts without taking an action (optional)",
},
},
Required: []string{"goal"},
}
}
func (a *BrowserAgentRunner) Plannable() bool {
return true
}
// BrowserAgentRunnerConfigMeta returns the metadata for Browser Agent Runner action configuration fields
func BrowserAgentRunnerConfigMeta() []config.Field {
return []config.Field{
{
Name: "baseURL",
Label: "Base URL",
Type: config.FieldTypeText,
Required: false,
HelpText: "Base URL of the LocalOperator API",
},
{
Name: "customActionName",
Label: "Custom Action Name",
Type: config.FieldTypeText,
HelpText: "Custom name for this action",
},
}
}

View File

@@ -444,7 +444,7 @@ func (a *App) ExecuteAction(pool *state.AgentPool) func(c *fiber.Ctx) error {
actionName := c.Params("name")
xlog.Debug("Executing action", "action", actionName, "config", payload.Config, "params", payload.Params)
a, err := services.Action(actionName, "", payload.Config, pool)
a, err := services.Action(actionName, "", payload.Config, pool, map[string]string{})
if err != nil {
xlog.Error("Error creating action", "error", err)
return errorJSONMessage(c, err.Error())