feat(planning): enable agent planning (#68)

* feat(planning): Allow the agent to plan subtasks Signed-off-by: mudler <mudler@localai.io> * feat(planning): enable planning toggle in the webui Signed-off-by: mudler <mudler@localai.io> * feat(planning): take in consideration the overall goal Signed-off-by: mudler <mudler@localai.io> * Update core/action/plan.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Signed-off-by: mudler <mudler@localai.io> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-03-21 12:28:11 +01:00
parent 638eedc2a0
commit 33483ab4b9
37 changed files with 259 additions and 35 deletions
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -27,6 +27,7 @@ type ActionCurrentState struct {
 type Action interface {
 	Run(ctx context.Context, action action.ActionParams) (action.ActionResult, error)
 	Definition() action.ActionDefinition
+	Plannable() bool
 }

 type Actions []Action
@@ -211,8 +212,76 @@ func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act
 	)
 }

+func (a *Agent) handlePlanning(ctx context.Context, job *Job, chosenAction Action, actionParams action.ActionParams, reasoning string, pickTemplate string) error {
+	// Planning: run all the actions in sequence
+	if !chosenAction.Definition().Name.Is(action.PlanActionName) {
+		return nil
+	}
+
+	planResult := action.PlanResult{}
+	if err := actionParams.Unmarshal(&planResult); err != nil {
+		return fmt.Errorf("error unmarshalling plan result: %w", err)
+	}
+
+	xlog.Info("[Planning] starts", "agent", a.Character.Name, "goal", planResult.Goal)
+	for _, s := range planResult.Subtasks {
+		xlog.Info("[Planning] subtask", "agent", a.Character.Name, "action", s.Action, "reasoning", s.Reasoning)
+	}
+
+	if len(planResult.Subtasks) == 0 {
+		return fmt.Errorf("no subtasks")
+	}
+
+	// Execute all subtasks in sequence
+	for _, subtask := range planResult.Subtasks {
+		xlog.Info("[subtask] Generating parameters",
+			"agent", a.Character.Name,
+			"action", subtask.Action,
+			"reasoning", reasoning,
+		)
+
+		action := a.availableActions().Find(subtask.Action)
+
+		params, err := a.generateParameters(ctx, pickTemplate, action, a.currentConversation, fmt.Sprintf("%s, overall goal is: %s", subtask.Reasoning, planResult.Goal))
+		if err != nil {
+			return fmt.Errorf("error generating action's parameters: %w", err)
+
+		}
+		actionParams = params.actionParams
+
+		result, err := a.runAction(action, actionParams)
+		if err != nil {
+			return fmt.Errorf("error running action: %w", err)
+		}
+
+		stateResult := ActionState{ActionCurrentState{action, actionParams, subtask.Reasoning}, result}
+		job.Result.SetResult(stateResult)
+		job.CallbackWithResult(stateResult)
+		xlog.Debug("[subtask] Action executed", "agent", a.Character.Name, "action", action.Definition().Name, "result", result)
+		a.addFunctionResultToConversation(action, actionParams, result)
+	}
+
+	return nil
+}
+
 func (a *Agent) availableActions() Actions {
 	//	defaultActions := append(a.options.userActions, action.NewReply())
+
+	addPlanAction := func(actions Actions) Actions {
+		if !a.options.canPlan {
+			return actions
+		}
+		plannablesActions := []string{}
+		for _, a := range actions {
+			if a.Plannable() {
+				plannablesActions = append(plannablesActions, a.Definition().Name.String())
+			}
+		}
+		planAction := action.NewPlan(plannablesActions)
+		actions = append(actions, planAction)
+		return actions
+	}
+
 	defaultActions := append(a.mcpActions, a.options.userActions...)

 	if a.options.initiateConversations && a.selfEvaluationInProgress { // && self-evaluation..
@@ -224,7 +293,7 @@ func (a *Agent) availableActions() Actions {
 		//		acts = append(acts, action.NewStop())
 		//	}

-		return acts
+		return addPlanAction(acts)
 	}

 	if a.options.canStopItself {
@@ -232,14 +301,14 @@ func (a *Agent) availableActions() Actions {
 		if a.options.enableHUD {
 			acts = append(acts, action.NewState())
 		}
-		return acts
+		return addPlanAction(acts)
 	}

 	if a.options.enableHUD {
-		return append(defaultActions, action.NewState())
+		return addPlanAction(append(defaultActions, action.NewState()))
 	}

-	return defaultActions
+	return addPlanAction(defaultActions)
 }

 func (a *Agent) prepareHUD() (promptHUD *PromptHUD) {
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"os"
-	"strings"
 	"sync"
 	"time"

@@ -571,6 +570,11 @@ func (a *Agent) consumeJob(job *Job, role string) {
 		return
 	}

+	if err := a.handlePlanning(ctx, job, chosenAction, actionParams, reasoning, pickTemplate); err != nil {
+		job.Result.Finish(fmt.Errorf("error running action: %w", err))
+		return
+	}
+
 	if !job.Callback(ActionCurrentState{chosenAction, actionParams, reasoning}) {
 		job.Result.SetResult(ActionState{ActionCurrentState{chosenAction, actionParams, reasoning}, action.ActionResult{Result: "stopped by callback"}})
 		job.Result.Conversation = a.currentConversation
@@ -620,27 +624,7 @@ func (a *Agent) consumeJob(job *Job, role string) {
 		job.CallbackWithResult(stateResult)
 		xlog.Debug("Action executed", "agent", a.Character.Name, "action", chosenAction.Definition().Name, "result", result)

-		// calling the function
-		a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
-			Role: "assistant",
-			ToolCalls: []openai.ToolCall{
-				{
-					Type: openai.ToolTypeFunction,
-					Function: openai.FunctionCall{
-						Name:      chosenAction.Definition().Name.String(),
-						Arguments: actionParams.String(),
-					},
-				},
-			},
-		})
-
-		// result of calling the function
-		a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
-			Role:       openai.ChatMessageRoleTool,
-			Content:    result.Result,
-			Name:       chosenAction.Definition().Name.String(),
-			ToolCallID: chosenAction.Definition().Name.String(),
-		})
+		a.addFunctionResultToConversation(chosenAction, actionParams, result)

 		//a.currentConversation = append(a.currentConversation, messages...)
 		//a.currentConversation = messages
@@ -776,8 +760,7 @@ func (a *Agent) consumeJob(job *Job, role string) {
 	}

 	// If we didn't got any message, we can use the response from the action
-	if chosenAction.Definition().Name.Is(action.ReplyActionName) && msg.Content == "" ||
-		strings.Contains(msg.Content, "<tool_call>") {
+	if chosenAction.Definition().Name.Is(action.ReplyActionName) && msg.Content == "" {
 		xlog.Info("No output returned from conversation, using the action response as a reply " + replyResponse.Message)

 		msg = openai.ChatCompletionMessage{
@@ -794,6 +777,30 @@ func (a *Agent) consumeJob(job *Job, role string) {
 	job.Result.Finish(nil)
 }

+func (a *Agent) addFunctionResultToConversation(chosenAction Action, actionParams action.ActionParams, result action.ActionResult) {
+	// calling the function
+	a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
+		Role: "assistant",
+		ToolCalls: []openai.ToolCall{
+			{
+				Type: openai.ToolTypeFunction,
+				Function: openai.FunctionCall{
+					Name:      chosenAction.Definition().Name.String(),
+					Arguments: actionParams.String(),
+				},
+			},
+		},
+	})
+
+	// result of calling the function
+	a.currentConversation = append(a.currentConversation, openai.ChatCompletionMessage{
+		Role:       openai.ChatMessageRoleTool,
+		Content:    result.Result,
+		Name:       chosenAction.Definition().Name.String(),
+		ToolCallID: chosenAction.Definition().Name.String(),
+	})
+}
+
 // This is running in the background.
 func (a *Agent) periodicallyRun(timer *time.Timer) {
 	// Remember always to reset the timer - if we don't the agent will stop..
--- a/core/agent/agent_test.go
+++ b/core/agent/agent_test.go
@@ -36,6 +36,10 @@ type TestAction struct {
 	response map[string]string
 }

+func (a *TestAction) Plannable() bool {
+	return true
+}
+
 func (a *TestAction) Run(c context.Context, p action.ActionParams) (action.ActionResult, error) {
 	for k, r := range a.response {
 		if strings.Contains(strings.ToLower(p.String()), strings.ToLower(k)) {
--- a/core/agent/mcp.go
+++ b/core/agent/mcp.go
@@ -26,6 +26,10 @@ type mcpAction struct {
 	toolDescription string
 }

+func (a *mcpAction) Plannable() bool {
+	return true
+}
+
 func (m *mcpAction) Run(ctx context.Context, params action.ActionParams) (action.ActionResult, error) {
 	resp, err := m.mcpClient.CallTool(ctx, m.toolName, params)
 	if err != nil {
--- a/core/agent/options.go
+++ b/core/agent/options.go
@@ -26,6 +26,7 @@ type options struct {
 	canStopItself         bool
 	initiateConversations bool
 	forceReasoning        bool
+	canPlan               bool
 	characterfile         string
 	statefile             string
 	context               context.Context
@@ -127,6 +128,11 @@ var EnableInitiateConversations = func(o *options) error {
 	return nil
 }

+var EnablePlanning = func(o *options) error {
+	o.canPlan = true
+	return nil
+}
+
 // EnableStandaloneJob is an option to enable the agent
 // to run jobs in the background automatically
 var EnableStandaloneJob = func(o *options) error {