feat: improve action picking

Signed-off-by: mudler <mudler@localai.io>
2025-04-11 20:43:48 +02:00
parent 9dad2b0ba4
commit e4271b4d2f
6 changed files with 200 additions and 141 deletions
--- a/core/action/goal.go
+++ b/core/action/goal.go
@@ -0,0 +1,49 @@
 package action
 import (
 	"context"
 	"github.com/mudler/LocalAGI/core/types"
 	"github.com/sashabaranov/go-openai/jsonschema"
 )
 // NewGoal creates a new intention action
 // The inention action is special as it tries to identify
 // a tool to use and a reasoning over to use it
 func NewGoal(s ...string) *GoalAction {
 	return &GoalAction{tools: s}
 }
 type GoalAction struct {
 	tools []string
 }
 type GoalResponse struct {
 	Goal     string `json:"goal"`
 	Achieved bool   `json:"achieved"`
 }
 func (a *GoalAction) Run(context.Context, types.ActionParams) (types.ActionResult, error) {
 	return types.ActionResult{}, nil
 }
 func (a *GoalAction) Plannable() bool {
 	return false
 }
 func (a *GoalAction) Definition() types.ActionDefinition {
 	return types.ActionDefinition{
 		Name:        "goal",
 		Description: "Check if the goal is achieved",
 		Properties: map[string]jsonschema.Definition{
 			"goal": {
 				Type:        jsonschema.String,
 				Description: "The goal to check if it is achieved.",
 			},
 			"achieved": {
 				Type:        jsonschema.Boolean,
 				Description: "Whether the goal is achieved",
 			},
 		},
 		Required: []string{"goal", "achieved"},
 	}
 }
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -5,7 +5,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"strings"
 	"github.com/mudler/LocalAGI/core/action"
 	"github.com/mudler/LocalAGI/core/types"
@@ -80,6 +79,15 @@ func (m Messages) ToOpenAI() []openai.ChatCompletionMessage {
 	return []openai.ChatCompletionMessage(m)
 }
 func (m Messages) RemoveIf(f func(msg openai.ChatCompletionMessage) bool) Messages {
 	for i := len(m) - 1; i >= 0; i-- {
 		if f(m[i]) {
 			m = append(m[:i], m[i+1:]...)
 		}
 	}
 	return m
 }
 func (m Messages) String() string {
 	s := ""
 	for _, cc := range m {
@@ -412,27 +420,26 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		actionsID = append(actionsID, m.Definition().Name.String())
 	}
-	thoughtPromptStringBuilder := strings.Builder{}
+	// thoughtPromptStringBuilder := strings.Builder{}
-	thoughtPromptStringBuilder.WriteString("You have to pick an action based on the conversation and the prompt. Describe the full reasoning process for your choice. Here is a list of actions: ")
+	// thoughtPromptStringBuilder.WriteString("You have to pick an action based on the conversation and the prompt. Describe the full reasoning process for your choice. Here is a list of actions: ")
-	for _, m := range a.availableActions() {
+	// for _, m := range a.availableActions() {
-		thoughtPromptStringBuilder.WriteString(
+	// 	thoughtPromptStringBuilder.WriteString(
-			m.Definition().Name.String() + ": " + m.Definition().Description + "\n",
+	// 		m.Definition().Name.String() + ": " + m.Definition().Description + "\n",
-		)
+	// 	)
-	}
+	// }
-	thoughtPromptStringBuilder.WriteString("To not use any action, respond with 'none'")
+	// thoughtPromptStringBuilder.WriteString("To not use any action, respond with 'none'")
-	//thoughtPromptStringBuilder.WriteString("\n\nConversation: " + Messages(c).String())
+	//thoughtPromptStringBuilder.WriteString("\n\nConversation: " + Messages(c).RemoveIf(func(msg openai.ChatCompletionMessage) bool {
 	//	return msg.Role == "system"
 	//}).String())
-	thoughtPrompt := thoughtPromptStringBuilder.String()
+	//thoughtPrompt := thoughtPromptStringBuilder.String()
-	xlog.Debug("[pickAction] thought", "prompt", thoughtPrompt)
+	//thoughtConv := []openai.ChatCompletionMessage{}
 	thought, err := a.askLLM(ctx,
-		append(c, openai.ChatCompletionMessage{
+		c,
 			Role:    "user",
 			Content: thoughtPrompt,
 		}),
 		maxRetries,
 	)
 	if err != nil {
@@ -440,71 +447,89 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 	}
 	originalReasoning := thought.Content
 	xlog.Debug("[pickAction] thought", "reason", originalReasoning)
 	thought, err = a.askLLM(ctx,
 		[]openai.ChatCompletionMessage{
 			{
 				Role:    "system",
 				Content: "Your only objective is to return the string of the action to take based on the user input",
 			},
 			{
 				Role:    "user",
 				Content: "Given the following sentence, answer with the only action to take: " + originalReasoning,
 			}},
 		maxRetries,
 	)
 	if err != nil {
 		return nil, nil, "", err
 	}
 	reason := thought.Content
 	xlog.Debug("[pickAction] filtered thought", "reason", reason)
 	// From the thought, get the action call
 	// Get all the available actions IDs
-	intentionsTools := action.NewIntention(append(actionsID, "none")...)
+	// by grammar, let's decide if we have achieved the goal
 	//  1. analyze response and check if  goal is achieved
 	// NOTE: we do not give the full conversation here to pick the action
 	// to avoid hallucinations
 	params, err := a.decision(ctx,
 		[]openai.ChatCompletionMessage{
 			{
 				Role:    "system",
-				Content: "You have to pick the correct action based on the user reasoning",
+				Content: "Extract an action to perform from the following reasoning: ",
 			},
 			{
 				Role:    "user",
-				Content: reason,
+				Content: originalReasoning,
-			},
+			}},
-		},
+		types.Actions{action.NewGoal()}.ToTools(),
-		types.Actions{intentionsTools}.ToTools(),
+		action.NewGoal().Definition().Name, maxRetries)
 		intentionsTools.Definition().Name, maxRetries)
 	if err != nil {
 		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
 	}
-	actionChoice := action.IntentResponse{}
+	goalResponse := action.GoalResponse{}
-
+	err = params.actionParams.Unmarshal(&goalResponse)
 	if params.actionParams == nil {
 		return nil, nil, params.message, nil
 	}
 	err = params.actionParams.Unmarshal(&actionChoice)
 	if err != nil {
 		return nil, nil, "", err
 	}
-	if actionChoice.Tool == "" || actionChoice.Tool == "none" {
+	if goalResponse.Achieved {
 		xlog.Debug("[pickAction] goal achieved", "goal", goalResponse.Goal)
 		return nil, nil, "", nil
 	}
-	// Find the action
+	// if the goal is not achieved, pick an action
-	chosenAction := a.availableActions().Find(actionChoice.Tool)
+	xlog.Debug("[pickAction] goal not achieved", "goal", goalResponse.Goal)
-	if chosenAction == nil {
+
-		return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
+	xlog.Debug("[pickAction] thought", "conv", c, "originalReasoning", originalReasoning)
 	// TODO: FORCE to select ana ction here
 	// NOTE: we do not give the full conversation here to pick the action
 	// to avoid hallucinations
 	params, err = a.decision(ctx,
 		[]openai.ChatCompletionMessage{
 			{
 				Role:    "system",
 				Content: "Extract an action to perform from the following reasoning: ",
 			},
 			{
 				Role:    "user",
 				Content: originalReasoning,
 			}},
 		a.availableActions().ToTools(),
 		nil, maxRetries)
 	if err != nil {
 		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
 	}
 	chosenAction := a.availableActions().Find(params.actioName)
 	// xlog.Debug("[pickAction] params", "params", params)
 	// if params.actionParams == nil {
 	// 	return nil, nil, params.message, nil
 	// }
 	// xlog.Debug("[pickAction] actionChoice", "actionChoice", params.actionParams, "message", params.message)
 	// actionChoice := action.IntentResponse{}
 	// err = params.actionParams.Unmarshal(&actionChoice)
 	// if err != nil {
 	// 	return nil, nil, "", err
 	// }
 	// if actionChoice.Tool == "" || actionChoice.Tool == "none" {
 	// 	return nil, nil, "", nil
 	// }
 	// // Find the action
 	// chosenAction := a.availableActions().Find(actionChoice.Tool)
 	// if chosenAction == nil {
 	// 	return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
 	// }
 	return chosenAction, nil, originalReasoning, nil
 }
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -515,10 +515,21 @@ func (a *Agent) consumeJob(job *types.Job, role string) {
 		//job.Result.Finish(fmt.Errorf("no action to do"))\
 		xlog.Info("No action to do, just reply", "agent", a.Character.Name, "reasoning", reasoning)
 		if reasoning != "" {
 			conv = append(conv, openai.ChatCompletionMessage{
 				Role:    "assistant",
 				Content: reasoning,
 			})
 		} else {
 			xlog.Info("No reasoning, just reply", "agent", a.Character.Name)
 			msg, err := a.askLLM(job.GetContext(), conv, maxRetries)
 			if err != nil {
 				job.Result.Finish(fmt.Errorf("error asking LLM for a reply: %w", err))
 				return
 			}
 			conv = append(conv, msg)
 			reasoning = msg.Content
 		}
 		xlog.Debug("Finish job with reasoning", "reasoning", reasoning, "agent", a.Character.Name, "conversation", fmt.Sprintf("%+v", conv))
 		job.Result.Conversation = conv
--- a/core/agent/templates.go
+++ b/core/agent/templates.go
@@ -82,11 +82,7 @@ Current State:
 - Short-term Memory: {{range .CurrentState.Memories}}{{.}} {{end}}{{end}}
 Current Time: {{.Time}}`
-const pickSelfTemplate = `Available Tools:
+const pickSelfTemplate = `
 {{range .Actions -}}
 - {{.Name}}: {{.Description }}
 {{ end }}
 You are an autonomous AI agent with a defined character and state (as shown above).
 Your task is to evaluate your current situation and determine the best course of action.
@@ -108,40 +104,21 @@ Remember:
 - Keep track of your progress and state
 - Be proactive in addressing potential issues
 {{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}
 ` + hudTemplate
 const reSelfEvalTemplate = pickSelfTemplate + `
 Previous actions have been executed. Evaluate the current situation:
 1. Review the outcomes of previous actions
 2. Assess progress toward your goals
 3. Identify any issues or challenges
 4. Determine if additional actions are needed
 Consider:
 - Success of previous actions
 - Changes in the situation
 - New information or insights
 - Potential next steps
 Make a decision about whether to:
 - Continue with more actions
 - Provide a final response
 - Adjust your approach
 - Update your goals or state`
 const pickActionTemplate = hudTemplate + `
 Available Tools:
 {{range .Actions -}}
 - {{.Name}}: {{.Description }}
 {{ end }}
-Task: Analyze the situation and determine the best course of action.
+{{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}
 ` + hudTemplate
 const reSelfEvalTemplate = pickSelfTemplate
 const pickActionTemplate = hudTemplate + `
 Your only task is to analyze the situation and determine a goal and the best tool to use, or just a final response if we have fullfilled the goal.
 Guidelines:
-1. Review the current state and context
+1. Review the current state, what was done already and context
 2. Consider available tools and their purposes
 3. Plan your approach carefully
 4. Explain your reasoning clearly
@@ -159,38 +136,11 @@ Decision Process:
 4. Explain your reasoning
 5. Execute the chosen action
 Available Tools:
 {{range .Actions -}}
 - {{.Name}}: {{.Description }}
 {{ end }}
 {{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}`
-const reEvalTemplate = pickActionTemplate + `
+const reEvalTemplate = pickActionTemplate
 Previous actions have been executed. Let's evaluate the current situation:
 1. Review Previous Actions:
   - What actions were taken
   - What were the results
   - Any issues or challenges encountered
 2. Assess Current State:
   - Progress toward goals
   - Changes in the situation
   - New information or insights
   - Current challenges or opportunities
 3. Determine Next Steps:
   - Additional tools needed
   - Final response required
   - Error handling needed
   - Approach adjustments required
 4. Decision Making:
   - If task is complete: Use "reply" tool
   - If errors exist: Address them appropriately
   - If more actions needed: Explain why and which tools
   - If situation changed: Adapt your approach
 Remember to:
 - Consider all available information
 - Be specific about next steps
 - Explain your reasoning clearly
 - Handle errors appropriately
 - Provide complete responses when done`
--- a/services/actions/githubprreader.go
+++ b/services/actions/githubprreader.go
@@ -64,20 +64,44 @@ func (g *GithubPRReader) Run(ctx context.Context, params types.ActionParams) (ty
 		return types.ActionResult{Result: fmt.Sprintf("Error fetching pull request files: %s", err.Error())}, err
 	}
 	// Get CI status information
 	ciStatus := "\n\nCI Status:\n"
 	// Get PR status checks
 	checkRuns, _, err := g.client.Checks.ListCheckRunsForRef(ctx, result.Owner, result.Repository, pr.GetHead().GetSHA(), &github.ListCheckRunsOptions{})
 	if err == nil && checkRuns != nil {
 		ciStatus += fmt.Sprintf("\nPR Status Checks:\n")
 		ciStatus += fmt.Sprintf("Total Checks: %d\n", checkRuns.GetTotal())
 		for _, check := range checkRuns.CheckRuns {
 			ciStatus += fmt.Sprintf("- %s: %s (%s)\n",
 				check.GetName(),
 				check.GetConclusion(),
 				check.GetStatus())
 		}
 	}
 	// Build the file changes summary with patches
 	fileChanges := "\n\nFile Changes:\n"
 	for _, file := range files {
-		fileChanges += fmt.Sprintf("\n--- %s\n+++ %s\n", *file.Filename, *file.Filename)
+		fileChanges += fmt.Sprintf("\n--- %s\n+++ %s\n", file.GetFilename(), file.GetFilename())
-		if g.showFullDiff && file.Patch != nil {
+		if g.showFullDiff && file.GetPatch() != "" {
-			fileChanges += *file.Patch
+			fileChanges += file.GetPatch()
 		}
-		fileChanges += fmt.Sprintf("\n(%d additions, %d deletions)\n", *file.Additions, *file.Deletions)
+		fileChanges += fmt.Sprintf("\n(%d additions, %d deletions)\n", file.GetAdditions(), file.GetDeletions())
 	}
 	return types.ActionResult{
 		Result: fmt.Sprintf(
-			"Pull Request %d Repository: %s\nTitle: %s\nBody: %s\nState: %s\nBase: %s\nHead: %s%s",
+			"Pull Request %d Repository: %s\nTitle: %s\nBody: %s\nState: %s\nBase: %s\nHead: %s%s%s",
-			*pr.Number, *pr.Base.Repo.FullName, *pr.Title, *pr.Body, *pr.State, *pr.Base.Ref, *pr.Head.Ref, fileChanges)}, nil
+			pr.GetNumber(),
 			pr.GetBase().GetRepo().GetFullName(),
 			pr.GetTitle(),
 			pr.GetBody(),
 			pr.GetState(),
 			pr.GetBase().GetRef(),
 			pr.GetHead().GetRef(),
 			ciStatus,
 			fileChanges)}, nil
 }
 func (g *GithubPRReader) Definition() types.ActionDefinition {
--- a/services/actions/githubprreviewer.go
+++ b/services/actions/githubprreviewer.go
@@ -127,15 +127,15 @@ func (g *GithubPRReviewer) Run(ctx context.Context, params types.ActionParams) (
 		return types.ActionResult{Result: errorDetails}, err
 	}
-	actionResult := strings.Builder{}
+	actionResult := fmt.Sprintf(
 		"Pull request https://github.com/%s/%s/pull/%d reviewed successfully with status: %s",
 		result.Owner,
 		result.Repository,
 		result.PRNumber,
 		strings.ToLower(result.ReviewAction),
 	)
-	actionResult.WriteString("========================== PR Reviewer Result ==========================\n")
+	return types.ActionResult{Result: actionResult}, nil
 	actionResult.WriteString(fmt.Sprintf("Pull request https://github.com/%s/%s/pull/%d", result.Owner, result.Repository, result.PRNumber))
 	actionResult.WriteString("\n")
 	actionResult.WriteString(fmt.Sprintf("Successfully reviewed with status: %s", strings.ToLower(result.ReviewAction)))
 	actionResult.WriteString("\n")
 	return types.ActionResult{Result: actionResult.String()}, nil
 }
 func (g *GithubPRReviewer) Definition() types.ActionDefinition {