feat: improve action picking

Signed-off-by: mudler <mudler@localai.io>
2025-04-11 20:43:48 +02:00
parent 9dad2b0ba4
commit e4271b4d2f
6 changed files with 200 additions and 141 deletions
--- a/core/action/goal.go
+++ b/core/action/goal.go
@@ -0,0 +1,49 @@
+package action
+
+import (
+	"context"
+
+	"github.com/mudler/LocalAGI/core/types"
+	"github.com/sashabaranov/go-openai/jsonschema"
+)
+
+// NewGoal creates a new intention action
+// The inention action is special as it tries to identify
+// a tool to use and a reasoning over to use it
+func NewGoal(s ...string) *GoalAction {
+	return &GoalAction{tools: s}
+}
+
+type GoalAction struct {
+	tools []string
+}
+type GoalResponse struct {
+	Goal     string `json:"goal"`
+	Achieved bool   `json:"achieved"`
+}
+
+func (a *GoalAction) Run(context.Context, types.ActionParams) (types.ActionResult, error) {
+	return types.ActionResult{}, nil
+}
+
+func (a *GoalAction) Plannable() bool {
+	return false
+}
+
+func (a *GoalAction) Definition() types.ActionDefinition {
+	return types.ActionDefinition{
+		Name:        "goal",
+		Description: "Check if the goal is achieved",
+		Properties: map[string]jsonschema.Definition{
+			"goal": {
+				Type:        jsonschema.String,
+				Description: "The goal to check if it is achieved.",
+			},
+			"achieved": {
+				Type:        jsonschema.Boolean,
+				Description: "Whether the goal is achieved",
+			},
+		},
+		Required: []string{"goal", "achieved"},
+	}
+}
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -5,7 +5,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
-	"strings"

 	"github.com/mudler/LocalAGI/core/action"
 	"github.com/mudler/LocalAGI/core/types"
@@ -80,6 +79,15 @@ func (m Messages) ToOpenAI() []openai.ChatCompletionMessage {
 	return []openai.ChatCompletionMessage(m)
 }

+func (m Messages) RemoveIf(f func(msg openai.ChatCompletionMessage) bool) Messages {
+	for i := len(m) - 1; i >= 0; i-- {
+		if f(m[i]) {
+			m = append(m[:i], m[i+1:]...)
+		}
+	}
+	return m
+}
+
 func (m Messages) String() string {
 	s := ""
 	for _, cc := range m {
@@ -412,27 +420,26 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		actionsID = append(actionsID, m.Definition().Name.String())
 	}

-	thoughtPromptStringBuilder := strings.Builder{}
-	thoughtPromptStringBuilder.WriteString("You have to pick an action based on the conversation and the prompt. Describe the full reasoning process for your choice. Here is a list of actions: ")
-	for _, m := range a.availableActions() {
-		thoughtPromptStringBuilder.WriteString(
-			m.Definition().Name.String() + ": " + m.Definition().Description + "\n",
-		)
-	}
+	// thoughtPromptStringBuilder := strings.Builder{}
+	// thoughtPromptStringBuilder.WriteString("You have to pick an action based on the conversation and the prompt. Describe the full reasoning process for your choice. Here is a list of actions: ")
+	// for _, m := range a.availableActions() {
+	// 	thoughtPromptStringBuilder.WriteString(
+	// 		m.Definition().Name.String() + ": " + m.Definition().Description + "\n",
+	// 	)
+	// }

-	thoughtPromptStringBuilder.WriteString("To not use any action, respond with 'none'")
+	// thoughtPromptStringBuilder.WriteString("To not use any action, respond with 'none'")

-	//thoughtPromptStringBuilder.WriteString("\n\nConversation: " + Messages(c).String())
+	//thoughtPromptStringBuilder.WriteString("\n\nConversation: " + Messages(c).RemoveIf(func(msg openai.ChatCompletionMessage) bool {
+	//	return msg.Role == "system"
+	//}).String())

-	thoughtPrompt := thoughtPromptStringBuilder.String()
+	//thoughtPrompt := thoughtPromptStringBuilder.String()

-	xlog.Debug("[pickAction] thought", "prompt", thoughtPrompt)
+	//thoughtConv := []openai.ChatCompletionMessage{}

 	thought, err := a.askLLM(ctx,
-		append(c, openai.ChatCompletionMessage{
-			Role:    "user",
-			Content: thoughtPrompt,
-		}),
+		c,
 		maxRetries,
 	)
 	if err != nil {
@@ -440,71 +447,89 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 	}
 	originalReasoning := thought.Content

-	xlog.Debug("[pickAction] thought", "reason", originalReasoning)
-
-	thought, err = a.askLLM(ctx,
-		[]openai.ChatCompletionMessage{
-			{
-				Role:    "system",
-				Content: "Your only objective is to return the string of the action to take based on the user input",
-			},
-			{
-				Role:    "user",
-				Content: "Given the following sentence, answer with the only action to take: " + originalReasoning,
-			}},
-		maxRetries,
-	)
-	if err != nil {
-		return nil, nil, "", err
-	}
-	reason := thought.Content
-
-	xlog.Debug("[pickAction] filtered thought", "reason", reason)
-
 	// From the thought, get the action call
 	// Get all the available actions IDs

-	intentionsTools := action.NewIntention(append(actionsID, "none")...)
+	// by grammar, let's decide if we have achieved the goal
+	//  1. analyze response and check if  goal is achieved

-	// NOTE: we do not give the full conversation here to pick the action
-	// to avoid hallucinations
 	params, err := a.decision(ctx,
 		[]openai.ChatCompletionMessage{
 			{
 				Role:    "system",
-				Content: "You have to pick the correct action based on the user reasoning",
+				Content: "Extract an action to perform from the following reasoning: ",
 			},
 			{
 				Role:    "user",
-				Content: reason,
-			},
-		},
-		types.Actions{intentionsTools}.ToTools(),
-		intentionsTools.Definition().Name, maxRetries)
+				Content: originalReasoning,
+			}},
+		types.Actions{action.NewGoal()}.ToTools(),
+		action.NewGoal().Definition().Name, maxRetries)
 	if err != nil {
 		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
 	}

-	actionChoice := action.IntentResponse{}
-
-	if params.actionParams == nil {
-		return nil, nil, params.message, nil
-	}
-
-	err = params.actionParams.Unmarshal(&actionChoice)
+	goalResponse := action.GoalResponse{}
+	err = params.actionParams.Unmarshal(&goalResponse)
 	if err != nil {
 		return nil, nil, "", err
 	}

-	if actionChoice.Tool == "" || actionChoice.Tool == "none" {
+	if goalResponse.Achieved {
+		xlog.Debug("[pickAction] goal achieved", "goal", goalResponse.Goal)
 		return nil, nil, "", nil
 	}

-	// Find the action
-	chosenAction := a.availableActions().Find(actionChoice.Tool)
-	if chosenAction == nil {
-		return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
+	// if the goal is not achieved, pick an action
+	xlog.Debug("[pickAction] goal not achieved", "goal", goalResponse.Goal)
+
+	xlog.Debug("[pickAction] thought", "conv", c, "originalReasoning", originalReasoning)
+
+	// TODO: FORCE to select ana ction here
+	// NOTE: we do not give the full conversation here to pick the action
+	// to avoid hallucinations
+	params, err = a.decision(ctx,
+		[]openai.ChatCompletionMessage{
+			{
+				Role:    "system",
+				Content: "Extract an action to perform from the following reasoning: ",
+			},
+			{
+				Role:    "user",
+				Content: originalReasoning,
+			}},
+		a.availableActions().ToTools(),
+		nil, maxRetries)
+	if err != nil {
+		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
 	}

+	chosenAction := a.availableActions().Find(params.actioName)
+
+	// xlog.Debug("[pickAction] params", "params", params)
+
+	// if params.actionParams == nil {
+	// 	return nil, nil, params.message, nil
+	// }
+
+	// xlog.Debug("[pickAction] actionChoice", "actionChoice", params.actionParams, "message", params.message)
+
+	// actionChoice := action.IntentResponse{}
+
+	// err = params.actionParams.Unmarshal(&actionChoice)
+	// if err != nil {
+	// 	return nil, nil, "", err
+	// }
+
+	// if actionChoice.Tool == "" || actionChoice.Tool == "none" {
+	// 	return nil, nil, "", nil
+	// }
+
+	// // Find the action
+	// chosenAction := a.availableActions().Find(actionChoice.Tool)
+	// if chosenAction == nil {
+	// 	return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
+	// }
+
 	return chosenAction, nil, originalReasoning, nil
 }
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -515,10 +515,21 @@ func (a *Agent) consumeJob(job *types.Job, role string) {
 		//job.Result.Finish(fmt.Errorf("no action to do"))\
 		xlog.Info("No action to do, just reply", "agent", a.Character.Name, "reasoning", reasoning)

-		conv = append(conv, openai.ChatCompletionMessage{
-			Role:    "assistant",
-			Content: reasoning,
-		})
+		if reasoning != "" {
+			conv = append(conv, openai.ChatCompletionMessage{
+				Role:    "assistant",
+				Content: reasoning,
+			})
+		} else {
+			xlog.Info("No reasoning, just reply", "agent", a.Character.Name)
+			msg, err := a.askLLM(job.GetContext(), conv, maxRetries)
+			if err != nil {
+				job.Result.Finish(fmt.Errorf("error asking LLM for a reply: %w", err))
+				return
+			}
+			conv = append(conv, msg)
+			reasoning = msg.Content
+		}

 		xlog.Debug("Finish job with reasoning", "reasoning", reasoning, "agent", a.Character.Name, "conversation", fmt.Sprintf("%+v", conv))
 		job.Result.Conversation = conv
--- a/core/agent/templates.go
+++ b/core/agent/templates.go
@@ -82,11 +82,7 @@ Current State:
 - Short-term Memory: {{range .CurrentState.Memories}}{{.}} {{end}}{{end}}
 Current Time: {{.Time}}`

-const pickSelfTemplate = `Available Tools:
-{{range .Actions -}}
- {{.Name}}: {{.Description }}
-{{ end }}
-
+const pickSelfTemplate = `
 You are an autonomous AI agent with a defined character and state (as shown above).
 Your task is to evaluate your current situation and determine the best course of action.

@@ -108,40 +104,21 @@ Remember:
 - Keep track of your progress and state
 - Be proactive in addressing potential issues

-{{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}
-` + hudTemplate
-
-const reSelfEvalTemplate = pickSelfTemplate + `
-
-Previous actions have been executed. Evaluate the current situation:
-
-1. Review the outcomes of previous actions
-2. Assess progress toward your goals
-3. Identify any issues or challenges
-4. Determine if additional actions are needed
-
-Consider:
- Success of previous actions
- Changes in the situation
- New information or insights
- Potential next steps
-
-Make a decision about whether to:
- Continue with more actions
- Provide a final response
- Adjust your approach
- Update your goals or state`
-
-const pickActionTemplate = hudTemplate + `
 Available Tools:
 {{range .Actions -}}
 - {{.Name}}: {{.Description }}
 {{ end }}

-Task: Analyze the situation and determine the best course of action.
+{{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}
+` + hudTemplate
+
+const reSelfEvalTemplate = pickSelfTemplate
+
+const pickActionTemplate = hudTemplate + `
+Your only task is to analyze the situation and determine a goal and the best tool to use, or just a final response if we have fullfilled the goal.

 Guidelines:
-1. Review the current state and context
+1. Review the current state, what was done already and context
 2. Consider available tools and their purposes
 3. Plan your approach carefully
 4. Explain your reasoning clearly
@@ -159,38 +136,11 @@ Decision Process:
 4. Explain your reasoning
 5. Execute the chosen action

+Available Tools:
+{{range .Actions -}}
+- {{.Name}}: {{.Description }}
+{{ end }}
+
 {{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}`

-const reEvalTemplate = pickActionTemplate + `
-
-Previous actions have been executed. Let's evaluate the current situation:
-
-1. Review Previous Actions:
-   - What actions were taken
-   - What were the results
-   - Any issues or challenges encountered
-
-2. Assess Current State:
-   - Progress toward goals
-   - Changes in the situation
-   - New information or insights
-   - Current challenges or opportunities
-
-3. Determine Next Steps:
-   - Additional tools needed
-   - Final response required
-   - Error handling needed
-   - Approach adjustments required
-
-4. Decision Making:
-   - If task is complete: Use "reply" tool
-   - If errors exist: Address them appropriately
-   - If more actions needed: Explain why and which tools
-   - If situation changed: Adapt your approach
-
-Remember to:
- Consider all available information
- Be specific about next steps
- Explain your reasoning clearly
- Handle errors appropriately
- Provide complete responses when done`
+const reEvalTemplate = pickActionTemplate