Add Github reviewer and improve reasoning (#27)

* Add Github reviewer and improve reasoning * feat: improve action picking Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: mudler <mudler@localai.io>
2025-04-11 21:57:19 +02:00
parent e4c7d1acfc
commit 5105b46f48
8 changed files with 539 additions and 314 deletions
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -79,6 +79,15 @@ func (m Messages) ToOpenAI() []openai.ChatCompletionMessage {
 	return []openai.ChatCompletionMessage(m)
 }

+func (m Messages) RemoveIf(f func(msg openai.ChatCompletionMessage) bool) Messages {
+	for i := len(m) - 1; i >= 0; i-- {
+		if f(m[i]) {
+			m = append(m[:i], m[i+1:]...)
+		}
+	}
+	return m
+}
+
 func (m Messages) String() string {
 	s := ""
 	for _, cc := range m {
@@ -358,7 +367,7 @@ func (a *Agent) prepareHUD() (promptHUD *PromptHUD) {
 func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.ChatCompletionMessage, maxRetries int) (types.Action, types.ActionParams, string, error) {
 	c := messages

-	xlog.Debug("picking action", "messages", messages)
+	xlog.Debug("[pickAction] picking action", "messages", messages)

 	if !a.options.forceReasoning {
 		xlog.Debug("not forcing reasoning")
@@ -389,7 +398,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		return chosenAction, thought.actionParams, thought.message, nil
 	}

-	xlog.Debug("forcing reasoning")
+	xlog.Debug("[pickAction] forcing reasoning")

 	prompt, err := renderTemplate(templ, a.prepareHUD(), a.availableActions(), "")
 	if err != nil {
@@ -406,71 +415,121 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		}, c...)
 	}

-	// We also could avoid to use functions here and get just a reply from the LLM
-	// and then use the reply to get the action
-	thought, err := a.decision(ctx,
-		c,
-		types.Actions{action.NewReasoning()}.ToTools(),
-		action.NewReasoning().Definition().Name, maxRetries)
-	if err != nil {
-		return nil, nil, "", err
-	}
-	reason := ""
-	response := &action.ReasoningResponse{}
-	if thought.actionParams != nil {
-		if err := thought.actionParams.Unmarshal(response); err != nil {
-			return nil, nil, "", err
-		}
-		reason = response.Reasoning
-	}
-	if thought.message != "" {
-		reason = thought.message
-	}
-
-	xlog.Debug("thought", "reason", reason)
-
-	// From the thought, get the action call
-	// Get all the available actions IDs
 	actionsID := []string{}
 	for _, m := range a.availableActions() {
 		actionsID = append(actionsID, m.Definition().Name.String())
 	}
-	intentionsTools := action.NewIntention(actionsID...)

-	// NOTE: we do not give the full conversation here to pick the action
-	// to avoid hallucinations
+	// thoughtPromptStringBuilder := strings.Builder{}
+	// thoughtPromptStringBuilder.WriteString("You have to pick an action based on the conversation and the prompt. Describe the full reasoning process for your choice. Here is a list of actions: ")
+	// for _, m := range a.availableActions() {
+	// 	thoughtPromptStringBuilder.WriteString(
+	// 		m.Definition().Name.String() + ": " + m.Definition().Description + "\n",
+	// 	)
+	// }
+
+	// thoughtPromptStringBuilder.WriteString("To not use any action, respond with 'none'")
+
+	//thoughtPromptStringBuilder.WriteString("\n\nConversation: " + Messages(c).RemoveIf(func(msg openai.ChatCompletionMessage) bool {
+	//	return msg.Role == "system"
+	//}).String())
+
+	//thoughtPrompt := thoughtPromptStringBuilder.String()
+
+	//thoughtConv := []openai.ChatCompletionMessage{}
+
+	thought, err := a.askLLM(ctx,
+		c,
+		maxRetries,
+	)
+	if err != nil {
+		return nil, nil, "", err
+	}
+	originalReasoning := thought.Content
+
+	// From the thought, get the action call
+	// Get all the available actions IDs
+
+	// by grammar, let's decide if we have achieved the goal
+	//  1. analyze response and check if  goal is achieved
+
 	params, err := a.decision(ctx,
-		[]openai.ChatCompletionMessage{{
-			Role:    "assistant",
-			Content: reason,
-		},
-		},
-		types.Actions{intentionsTools}.ToTools(),
-		intentionsTools.Definition().Name, maxRetries)
+		[]openai.ChatCompletionMessage{
+			{
+				Role:    "system",
+				Content: "Extract an action to perform from the following reasoning: ",
+			},
+			{
+				Role:    "user",
+				Content: originalReasoning,
+			}},
+		types.Actions{action.NewGoal()}.ToTools(),
+		action.NewGoal().Definition().Name, maxRetries)
 	if err != nil {
 		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
 	}

-	actionChoice := action.IntentResponse{}
-
-	if params.actionParams == nil {
-		return nil, nil, params.message, nil
-	}
-
-	err = params.actionParams.Unmarshal(&actionChoice)
+	goalResponse := action.GoalResponse{}
+	err = params.actionParams.Unmarshal(&goalResponse)
 	if err != nil {
 		return nil, nil, "", err
 	}

-	if actionChoice.Tool == "" || actionChoice.Tool == "none" {
-		return nil, nil, "", fmt.Errorf("no intent detected")
+	if goalResponse.Achieved {
+		xlog.Debug("[pickAction] goal achieved", "goal", goalResponse.Goal)
+		return nil, nil, "", nil
 	}

-	// Find the action
-	chosenAction := a.availableActions().Find(actionChoice.Tool)
-	if chosenAction == nil {
-		return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
+	// if the goal is not achieved, pick an action
+	xlog.Debug("[pickAction] goal not achieved", "goal", goalResponse.Goal)
+
+	xlog.Debug("[pickAction] thought", "conv", c, "originalReasoning", originalReasoning)
+
+	// TODO: FORCE to select ana ction here
+	// NOTE: we do not give the full conversation here to pick the action
+	// to avoid hallucinations
+	params, err = a.decision(ctx,
+		[]openai.ChatCompletionMessage{
+			{
+				Role:    "system",
+				Content: "Extract an action to perform from the following reasoning: ",
+			},
+			{
+				Role:    "user",
+				Content: originalReasoning,
+			}},
+		a.availableActions().ToTools(),
+		nil, maxRetries)
+	if err != nil {
+		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
 	}

-	return chosenAction, nil, actionChoice.Reasoning, nil
+	chosenAction := a.availableActions().Find(params.actioName)
+
+	// xlog.Debug("[pickAction] params", "params", params)
+
+	// if params.actionParams == nil {
+	// 	return nil, nil, params.message, nil
+	// }
+
+	// xlog.Debug("[pickAction] actionChoice", "actionChoice", params.actionParams, "message", params.message)
+
+	// actionChoice := action.IntentResponse{}
+
+	// err = params.actionParams.Unmarshal(&actionChoice)
+	// if err != nil {
+	// 	return nil, nil, "", err
+	// }
+
+	// if actionChoice.Tool == "" || actionChoice.Tool == "none" {
+	// 	return nil, nil, "", nil
+	// }
+
+	// // Find the action
+	// chosenAction := a.availableActions().Find(actionChoice.Tool)
+	// if chosenAction == nil {
+	// 	return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
+	// }
+
+	return chosenAction, nil, originalReasoning, nil
 }
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -515,10 +515,21 @@ func (a *Agent) consumeJob(job *types.Job, role string) {
 		//job.Result.Finish(fmt.Errorf("no action to do"))\
 		xlog.Info("No action to do, just reply", "agent", a.Character.Name, "reasoning", reasoning)

-		conv = append(conv, openai.ChatCompletionMessage{
-			Role:    "assistant",
-			Content: reasoning,
-		})
+		if reasoning != "" {
+			conv = append(conv, openai.ChatCompletionMessage{
+				Role:    "assistant",
+				Content: reasoning,
+			})
+		} else {
+			xlog.Info("No reasoning, just reply", "agent", a.Character.Name)
+			msg, err := a.askLLM(job.GetContext(), conv, maxRetries)
+			if err != nil {
+				job.Result.Finish(fmt.Errorf("error asking LLM for a reply: %w", err))
+				return
+			}
+			conv = append(conv, msg)
+			reasoning = msg.Content
+		}

 		xlog.Debug("Finish job with reasoning", "reasoning", reasoning, "agent", a.Character.Name, "conversation", fmt.Sprintf("%+v", conv))
 		job.Result.Conversation = conv
@@ -670,6 +681,7 @@ func (a *Agent) consumeJob(job *types.Job, role string) {
 		!chosenAction.Definition().Name.Is(action.ReplyActionName) {

 		xlog.Info("Following action", "action", followingAction.Definition().Name, "agent", a.Character.Name)
+		job.ConversationHistory = conv

 		// We need to do another action (?)
 		// The agent decided to do another action
--- a/core/agent/templates.go
+++ b/core/agent/templates.go
@@ -82,11 +82,7 @@ Current State:
 - Short-term Memory: {{range .CurrentState.Memories}}{{.}} {{end}}{{end}}
 Current Time: {{.Time}}`

-const pickSelfTemplate = `Available Tools:
-{{range .Actions -}}
- {{.Name}}: {{.Description }}
-{{ end }}
-
+const pickSelfTemplate = `
 You are an autonomous AI agent with a defined character and state (as shown above).
 Your task is to evaluate your current situation and determine the best course of action.

@@ -108,40 +104,21 @@ Remember:
 - Keep track of your progress and state
 - Be proactive in addressing potential issues

-{{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}
-` + hudTemplate
-
-const reSelfEvalTemplate = pickSelfTemplate + `
-
-Previous actions have been executed. Evaluate the current situation:
-
-1. Review the outcomes of previous actions
-2. Assess progress toward your goals
-3. Identify any issues or challenges
-4. Determine if additional actions are needed
-
-Consider:
- Success of previous actions
- Changes in the situation
- New information or insights
- Potential next steps
-
-Make a decision about whether to:
- Continue with more actions
- Provide a final response
- Adjust your approach
- Update your goals or state`
-
-const pickActionTemplate = hudTemplate + `
 Available Tools:
 {{range .Actions -}}
 - {{.Name}}: {{.Description }}
 {{ end }}

-Task: Analyze the situation and determine the best course of action.
+{{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}
+` + hudTemplate
+
+const reSelfEvalTemplate = pickSelfTemplate
+
+const pickActionTemplate = hudTemplate + `
+Your only task is to analyze the situation and determine a goal and the best tool to use, or just a final response if we have fullfilled the goal.

 Guidelines:
-1. Review the current state and context
+1. Review the current state, what was done already and context
 2. Consider available tools and their purposes
 3. Plan your approach carefully
 4. Explain your reasoning clearly
@@ -159,38 +136,11 @@ Decision Process:
 4. Explain your reasoning
 5. Execute the chosen action

+Available Tools:
+{{range .Actions -}}
+- {{.Name}}: {{.Description }}
+{{ end }}
+
 {{if .Reasoning}}Previous Reasoning: {{.Reasoning}}{{end}}`

-const reEvalTemplate = pickActionTemplate + `
-
-Previous actions have been executed. Let's evaluate the current situation:
-
-1. Review Previous Actions:
-   - What actions were taken
-   - What were the results
-   - Any issues or challenges encountered
-
-2. Assess Current State:
-   - Progress toward goals
-   - Changes in the situation
-   - New information or insights
-   - Current challenges or opportunities
-
-3. Determine Next Steps:
-   - Additional tools needed
-   - Final response required
-   - Error handling needed
-   - Approach adjustments required
-
-4. Decision Making:
-   - If task is complete: Use "reply" tool
-   - If errors exist: Address them appropriately
-   - If more actions needed: Explain why and which tools
-   - If situation changed: Adapt your approach
-
-Remember to:
- Consider all available information
- Be specific about next steps
- Explain your reasoning clearly
- Handle errors appropriately
- Provide complete responses when done`
+const reEvalTemplate = pickActionTemplate