feat(agent): add MCP integration (#50)

* feat(agent): add MCP integration Signed-off-by: mudler <mudler@localai.io> * Update core/agent/mcp.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Hook MCP Server configuration to creation and setting mask * Allow to specify a bearer token * Small fixups --------- Signed-off-by: mudler <mudler@localai.io> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-03-15 23:25:03 +01:00
parent dc2570c90b
commit 33b5b8c8f4
10 changed files with 422 additions and 14 deletions
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -174,7 +174,7 @@ func (m Messages) IsLastMessageFromRole(role string) bool {

 func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act Action, c []openai.ChatCompletionMessage, reasoning string) (*decisionResult, error) {

-	stateHUD, err := renderTemplate(pickTemplate, a.prepareHUD(), a.systemInternalActions(), reasoning)
+	stateHUD, err := renderTemplate(pickTemplate, a.prepareHUD(), a.availableActions(), reasoning)
 	if err != nil {
 		return nil, err
 	}
@@ -203,7 +203,7 @@ func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act

 	return a.decision(ctx,
 		cc,
-		a.systemInternalActions().ToTools(),
+		a.availableActions().ToTools(),
 		openai.ToolChoice{
 			Type:     openai.ToolTypeFunction,
 			Function: openai.ToolFunction{Name: act.Definition().Name.String()},
@@ -211,9 +211,9 @@ func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act
 	)
 }

-func (a *Agent) systemInternalActions() Actions {
+func (a *Agent) availableActions() Actions {
 	//	defaultActions := append(a.options.userActions, action.NewReply())
-	defaultActions := a.options.userActions
+	defaultActions := append(a.mcpActions, a.options.userActions...)

 	if a.options.initiateConversations && a.selfEvaluationInProgress { // && self-evaluation..
 		acts := append(defaultActions, action.NewConversation())
@@ -264,7 +264,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		// and then use the reply to get the action
 		thought, err := a.decision(ctx,
 			messages,
-			a.systemInternalActions().ToTools(),
+			a.availableActions().ToTools(),
 			nil)
 		if err != nil {
 			return nil, nil, "", err
@@ -274,7 +274,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		xlog.Debug(fmt.Sprintf("thought message: %v", thought.message))

 		// Find the action
-		chosenAction := a.systemInternalActions().Find(thought.actioName)
+		chosenAction := a.availableActions().Find(thought.actioName)
 		if chosenAction == nil || thought.actioName == "" {
 			xlog.Debug("no answer")

@@ -286,7 +286,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		return chosenAction, thought.actionParams, thought.message, nil
 	}

-	prompt, err := renderTemplate(templ, a.prepareHUD(), a.systemInternalActions(), "")
+	prompt, err := renderTemplate(templ, a.prepareHUD(), a.availableActions(), "")
 	if err != nil {
 		return nil, nil, "", err
 	}
@@ -325,7 +325,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 	// From the thought, get the action call
 	// Get all the available actions IDs
 	actionsID := []string{}
-	for _, m := range a.systemInternalActions() {
+	for _, m := range a.availableActions() {
 		actionsID = append(actionsID, m.Definition().Name.String())
 	}
 	intentionsTools := action.NewIntention(actionsID...)
@@ -358,7 +358,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 	}

 	// Find the action
-	chosenAction := a.systemInternalActions().Find(actionChoice.Tool)
+	chosenAction := a.availableActions().Find(actionChoice.Tool)
 	if chosenAction == nil {
 		return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
 	}
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -39,6 +39,8 @@ type Agent struct {
 	pause                    bool

 	newConversations chan openai.ChatCompletionMessage
+
+	mcpActions Actions
 }

 type RAGDB interface {
@@ -84,6 +86,10 @@ func New(opts ...Option) (*Agent, error) {
 	// xlog = xlog.New(h)
 	//programLevel.Set(a.options.logLevel)

+	xlog.Info("Populating actions from MCP Servers (if any)")
+	a.initMCPActions()
+	xlog.Info("Done populating actions from MCP Servers")
+
 	xlog.Info(
 		"Agent created",
 		"agent", a.Character.Name,
@@ -214,7 +220,7 @@ func (a *Agent) Memory() RAGDB {
 }

 func (a *Agent) runAction(chosenAction Action, params action.ActionParams) (result action.ActionResult, err error) {
-	for _, act := range a.systemInternalActions() {
+	for _, act := range a.availableActions() {
 		if act.Definition().Name == chosenAction.Definition().Name {
 			res, err := act.Run(a.actionContext, params)
 			if err != nil {
@@ -708,7 +714,7 @@ func (a *Agent) consumeJob(job *Job, role string) {

 	// If we have a hud, display it when answering normally
 	if a.options.enableHUD {
-		prompt, err := renderTemplate(hudTemplate, a.prepareHUD(), a.systemInternalActions(), reasoning)
+		prompt, err := renderTemplate(hudTemplate, a.prepareHUD(), a.availableActions(), reasoning)
 		if err != nil {
 			job.Result.Conversation = a.currentConversation
 			job.Result.Finish(fmt.Errorf("error renderTemplate: %w", err))
--- a/core/agent/mcp.go
+++ b/core/agent/mcp.go
@@ -0,0 +1,160 @@
+package agent
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+
+	mcp "github.com/metoro-io/mcp-golang"
+	"github.com/metoro-io/mcp-golang/transport/http"
+	"github.com/mudler/LocalAgent/core/action"
+	"github.com/mudler/LocalAgent/pkg/xlog"
+	"github.com/sashabaranov/go-openai/jsonschema"
+)
+
+var _ Action = &mcpAction{}
+
+type MCPServer struct {
+	URL   string `json:"url"`
+	Token string `json:"token"`
+}
+
+type mcpAction struct {
+	mcpClient       *mcp.Client
+	inputSchema     ToolInputSchema
+	toolName        string
+	toolDescription string
+}
+
+func (m *mcpAction) Run(ctx context.Context, params action.ActionParams) (action.ActionResult, error) {
+	resp, err := m.mcpClient.CallTool(ctx, m.toolName, params)
+	if err != nil {
+		xlog.Error("Failed to call tool", "error", err.Error())
+		return action.ActionResult{}, err
+	}
+
+	xlog.Debug("MCP response", "response", resp)
+
+	textResult := ""
+	for _, c := range resp.Content {
+		switch c.Type {
+		case mcp.ContentTypeText:
+			textResult += c.TextContent.Text + "\n"
+		case mcp.ContentTypeImage:
+			xlog.Error("Image content not supported yet")
+		case mcp.ContentTypeEmbeddedResource:
+			xlog.Error("Embedded resource content not supported yet")
+		}
+	}
+
+	return action.ActionResult{
+		Result: textResult,
+	}, nil
+}
+
+func (m *mcpAction) Definition() action.ActionDefinition {
+	props := map[string]jsonschema.Definition{}
+	dat, err := json.Marshal(m.inputSchema.Properties)
+	if err != nil {
+		xlog.Error("Failed to marshal input schema", "error", err.Error())
+	}
+	json.Unmarshal(dat, &props)
+
+	return action.ActionDefinition{
+		Name:        action.ActionDefinitionName(m.toolName),
+		Description: m.toolDescription,
+		Required:    m.inputSchema.Required,
+		//Properties:  ,
+		Properties: props,
+	}
+}
+
+type ToolInputSchema struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties,omitempty"`
+	Required   []string               `json:"required,omitempty"`
+}
+
+func (a *Agent) initMCPActions() error {
+
+	a.mcpActions = nil
+	var err error
+
+	generatedActions := Actions{}
+
+	for _, mcpServer := range a.options.mcpServers {
+		transport := http.NewHTTPClientTransport("/mcp")
+		transport.WithBaseURL(mcpServer.URL)
+		if mcpServer.Token != "" {
+			transport.WithHeader("Authorization", "Bearer "+mcpServer.Token)
+		}
+
+		// Create a new client
+		client := mcp.NewClient(transport)
+
+		xlog.Debug("Initializing client", "server", mcpServer.URL)
+		// Initialize the client
+		response, e := client.Initialize(a.context)
+		if e != nil {
+			xlog.Error("Failed to initialize client", "error", e.Error(), "server", mcpServer)
+			if err == nil {
+				err = e
+			} else {
+				err = errors.Join(err, e)
+			}
+			continue
+		}
+
+		xlog.Debug("Client initialized: %v", response.Instructions)
+
+		var cursor *string
+		for {
+			tools, err := client.ListTools(a.context, cursor)
+			if err != nil {
+				xlog.Error("Failed to list tools", "error", err.Error())
+				return err
+			}
+
+			for _, t := range tools.Tools {
+				desc := ""
+				if t.Description != nil {
+					desc = *t.Description
+				}
+
+				xlog.Debug("Tool", "mcpServer", mcpServer, "name", t.Name, "description", desc)
+
+				dat, err := json.Marshal(t.InputSchema)
+				if err != nil {
+					xlog.Error("Failed to marshal input schema", "error", err.Error())
+				}
+
+				xlog.Debug("Input schema", "mcpServer", mcpServer, "tool", t.Name, "schema", string(dat))
+
+				// XXX: This is a wild guess, to verify (data types might be incompatible)
+				var inputSchema ToolInputSchema
+				err = json.Unmarshal(dat, &inputSchema)
+				if err != nil {
+					xlog.Error("Failed to unmarshal input schema", "error", err.Error())
+				}
+
+				// Create a new action with Client + tool
+				generatedActions = append(generatedActions, &mcpAction{
+					mcpClient:       client,
+					toolName:        t.Name,
+					inputSchema:     inputSchema,
+					toolDescription: desc,
+				})
+			}
+
+			if tools.NextCursor == nil {
+				break // No more pages
+			}
+			cursor = tools.NextCursor
+		}
+
+	}
+
+	a.mcpActions = generatedActions
+
+	return err
+}
--- a/core/agent/options.go
+++ b/core/agent/options.go
@@ -44,6 +44,8 @@ type options struct {
 	resultCallback    func(ActionState)

 	conversationsPath string
+
+	mcpServers []MCPServer
 }

 func (o *options) SeparatedMultimodalModel() bool {
@@ -161,6 +163,13 @@ func WithSystemPrompt(prompt string) Option {
 	}
 }

+func WithMCPServers(servers ...MCPServer) Option {
+	return func(o *options) error {
+		o.mcpServers = servers
+		return nil
+	}
+}
+
 func WithLLMAPIURL(url string) Option {
 	return func(o *options) error {
 		o.LLMAPI.APIURL = url
--- a/core/state/config.go
+++ b/core/state/config.go
@@ -31,6 +31,7 @@ type AgentConfig struct {
 	Connector    []ConnectorConfig    `json:"connectors" form:"connectors" `
 	Actions      []ActionsConfig      `json:"actions" form:"actions"`
 	PromptBlocks []PromptBlocksConfig `json:"promptblocks" form:"promptblocks"`
+	MCPServers   []agent.MCPServer    `json:"mcp_servers" form:"mcp_servers"`

 	// This is what needs to be part of ActionsConfig
 	Model                 string `json:"model" form:"model"`
--- a/core/state/pool.go
+++ b/core/state/pool.go
@@ -212,6 +212,7 @@ func (a *AgentPool) startAgentWithConfig(name string, config *AgentConfig) error
 		WithModel(model),
 		WithLLMAPIURL(a.apiURL),
 		WithContext(ctx),
+		WithMCPServers(config.MCPServers...),
 		WithPeriodicRuns(config.PeriodicRuns),
 		WithPermanentGoal(config.PermanentGoal),
 		WithPrompts(promptBlocks...),