chore: cleanup, identify goal from conversation when evaluting achievement (#29)

* chore: cleanup, identify goal from conversation when evaluting achievement Signed-off-by: mudler <mudler@localai.io> * change base cpu model Signed-off-by: mudler <mudler@localai.io> * this is not necessary anymore Signed-off-by: mudler <mudler@localai.io> * use 12b Signed-off-by: mudler <mudler@localai.io> * use openthinker, it's smaller * chore(tests): set timeout Signed-off-by: mudler <mudler@localai.io> * Enable reasoning in some of the tests Signed-off-by: mudler <mudler@localai.io> * docker compose unification, small changes Signed-off-by: mudler <mudler@localai.io> * Simplify Signed-off-by: mudler <mudler@localai.io> * Back at arcee-agent as default Signed-off-by: mudler <mudler@localai.io> * Better error handling during planning Signed-off-by: mudler <mudler@localai.io> * Ci: do not run jobs for every branch Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: mudler <mudler@localai.io>
2025-04-12 21:01:01 +02:00
parent 209a9989c4
commit 60c249f19a
12 changed files with 267 additions and 311 deletions
--- a/core/action/goal.go
+++ b/core/action/goal.go
@@ -10,12 +10,11 @@ import (
 // NewGoal creates a new intention action
 // The inention action is special as it tries to identify
 // a tool to use and a reasoning over to use it
-func NewGoal(s ...string) *GoalAction {
-	return &GoalAction{tools: s}
+func NewGoal() *GoalAction {
+	return &GoalAction{}
 }

 type GoalAction struct {
-	tools []string
 }
 type GoalResponse struct {
 	Goal     string `json:"goal"`
--- a/core/action/plan.go
+++ b/core/action/plan.go
@@ -41,7 +41,7 @@ func (a *PlanAction) Plannable() bool {
 func (a *PlanAction) Definition() types.ActionDefinition {
 	return types.ActionDefinition{
 		Name:        PlanActionName,
-		Description: "Use this tool for solving complex tasks that involves calling more tools in sequence.",
+		Description: "Use it for situations that involves doing more actions in sequence.",
 		Properties: map[string]jsonschema.Definition{
 			"subtasks": {
 				Type:        jsonschema.Array,
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -24,15 +24,27 @@ type decisionResult struct {
 func (a *Agent) decision(
 	ctx context.Context,
 	conversation []openai.ChatCompletionMessage,
-	tools []openai.Tool, toolchoice any, maxRetries int) (*decisionResult, error) {
+	tools []openai.Tool, toolchoice string, maxRetries int) (*decisionResult, error) {
+
+	var choice *openai.ToolChoice
+
+	if toolchoice != "" {
+		choice = &openai.ToolChoice{
+			Type:     openai.ToolTypeFunction,
+			Function: openai.ToolFunction{Name: toolchoice},
+		}
+	}

 	var lastErr error
 	for attempts := 0; attempts < maxRetries; attempts++ {
 		decision := openai.ChatCompletionRequest{
-			Model:      a.options.LLMAPI.Model,
-			Messages:   conversation,
-			Tools:      tools,
-			ToolChoice: toolchoice,
+			Model:    a.options.LLMAPI.Model,
+			Messages: conversation,
+			Tools:    tools,
+		}
+
+		if choice != nil {
+			decision.ToolChoice = *choice
 		}

 		resp, err := a.client.CreateChatCompletion(ctx, decision)
@@ -42,6 +54,9 @@ func (a *Agent) decision(
 			continue
 		}

+		jsonResp, _ := json.Marshal(resp)
+		xlog.Debug("Decision response", "response", string(jsonResp))
+
 		if len(resp.Choices) != 1 {
 			lastErr = fmt.Errorf("no choices: %d", len(resp.Choices))
 			xlog.Warn("Attempt to make a decision failed", "attempt", attempts+1, "error", lastErr)
@@ -189,10 +204,7 @@ func (a *Agent) generateParameters(ctx context.Context, pickTemplate string, act
 		result, attemptErr = a.decision(ctx,
 			cc,
 			a.availableActions().ToTools(),
-			openai.ToolChoice{
-				Type:     openai.ToolTypeFunction,
-				Function: openai.ToolFunction{Name: act.Definition().Name.String()},
-			},
+			act.Definition().Name.String(),
 			maxAttempts,
 		)
 		if attemptErr == nil && result.actionParams != nil {
@@ -253,6 +265,7 @@ func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction

 		params, err := a.generateParameters(ctx, pickTemplate, subTaskAction, conv, subTaskReasoning, maxRetries)
 		if err != nil {
+			xlog.Error("error generating action's parameters", "error", err)
 			return conv, fmt.Errorf("error generating action's parameters: %w", err)

 		}
@@ -282,6 +295,7 @@ func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction

 		result, err := a.runAction(ctx, subTaskAction, actionParams)
 		if err != nil {
+			xlog.Error("error running action", "error", err)
 			return conv, fmt.Errorf("error running action: %w", err)
 		}

@@ -367,7 +381,9 @@ func (a *Agent) prepareHUD() (promptHUD *PromptHUD) {
 func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.ChatCompletionMessage, maxRetries int) (types.Action, types.ActionParams, string, error) {
 	c := messages

-	xlog.Debug("[pickAction] picking action", "messages", messages)
+	xlog.Debug("[pickAction] picking action starts", "messages", messages)
+
+	// Identify the goal of this conversation

 	if !a.options.forceReasoning {
 		xlog.Debug("not forcing reasoning")
@@ -376,7 +392,7 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		thought, err := a.decision(ctx,
 			messages,
 			a.availableActions().ToTools(),
-			nil,
+			"",
 			maxRetries)
 		if err != nil {
 			return nil, nil, "", err
@@ -415,120 +431,83 @@ func (a *Agent) pickAction(ctx context.Context, templ string, messages []openai.
 		}, c...)
 	}

-	actionsID := []string{}
+	thought, err := a.decision(ctx,
+		c,
+		types.Actions{action.NewReasoning()}.ToTools(),
+		action.NewReasoning().Definition().Name.String(), maxRetries)
+	if err != nil {
+		return nil, nil, "", err
+	}
+	originalReasoning := ""
+	response := &action.ReasoningResponse{}
+	if thought.actionParams != nil {
+		if err := thought.actionParams.Unmarshal(response); err != nil {
+			return nil, nil, "", err
+		}
+		originalReasoning = response.Reasoning
+	}
+	if thought.message != "" {
+		originalReasoning = thought.message
+	}
+
+	xlog.Debug("[pickAction] picking action", "messages", c)
+	// thought, err := a.askLLM(ctx,
+	// 	c,
+
+	actionsID := []string{"reply"}
 	for _, m := range a.availableActions() {
 		actionsID = append(actionsID, m.Definition().Name.String())
 	}

-	// thoughtPromptStringBuilder := strings.Builder{}
-	// thoughtPromptStringBuilder.WriteString("You have to pick an action based on the conversation and the prompt. Describe the full reasoning process for your choice. Here is a list of actions: ")
-	// for _, m := range a.availableActions() {
-	// 	thoughtPromptStringBuilder.WriteString(
-	// 		m.Definition().Name.String() + ": " + m.Definition().Description + "\n",
-	// 	)
-	// }
-
-	// thoughtPromptStringBuilder.WriteString("To not use any action, respond with 'none'")
-
-	//thoughtPromptStringBuilder.WriteString("\n\nConversation: " + Messages(c).RemoveIf(func(msg openai.ChatCompletionMessage) bool {
-	//	return msg.Role == "system"
-	//}).String())
-
-	//thoughtPrompt := thoughtPromptStringBuilder.String()
-
-	//thoughtConv := []openai.ChatCompletionMessage{}
-
-	thought, err := a.askLLM(ctx,
-		c,
-		maxRetries,
-	)
-	if err != nil {
-		return nil, nil, "", err
-	}
-	originalReasoning := thought.Content
-
-	// From the thought, get the action call
-	// Get all the available actions IDs
-
-	// by grammar, let's decide if we have achieved the goal
-	//  1. analyze response and check if  goal is achieved
-
-	params, err := a.decision(ctx,
-		[]openai.ChatCompletionMessage{
-			{
-				Role:    "system",
-				Content: "Extract an action to perform from the following reasoning: ",
-			},
-			{
-				Role:    "user",
-				Content: originalReasoning,
-			}},
-		types.Actions{action.NewGoal()}.ToTools(),
-		action.NewGoal().Definition().Name, maxRetries)
-	if err != nil {
-		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
-	}
-
-	goalResponse := action.GoalResponse{}
-	err = params.actionParams.Unmarshal(&goalResponse)
-	if err != nil {
-		return nil, nil, "", err
-	}
-
-	if goalResponse.Achieved {
-		xlog.Debug("[pickAction] goal achieved", "goal", goalResponse.Goal)
-		return nil, nil, "", nil
-	}
-
-	// if the goal is not achieved, pick an action
-	xlog.Debug("[pickAction] goal not achieved", "goal", goalResponse.Goal)
-
-	xlog.Debug("[pickAction] thought", "conv", c, "originalReasoning", originalReasoning)
+	xlog.Debug("[pickAction] actionsID", "actionsID", actionsID)

+	intentionsTools := action.NewIntention(actionsID...)
 	// TODO: FORCE to select ana ction here
 	// NOTE: we do not give the full conversation here to pick the action
 	// to avoid hallucinations
-	params, err = a.decision(ctx,
-		[]openai.ChatCompletionMessage{
-			{
-				Role:    "system",
-				Content: "Extract an action to perform from the following reasoning: ",
-			},
-			{
-				Role:    "user",
-				Content: originalReasoning,
-			}},
-		a.availableActions().ToTools(),
-		nil, maxRetries)
+
+	// Extract an action
+	params, err := a.decision(ctx,
+		append(c, openai.ChatCompletionMessage{
+			Role:    "system",
+			Content: "Pick the relevant action given the following reasoning: " + originalReasoning,
+		}),
+		types.Actions{intentionsTools}.ToTools(),
+		intentionsTools.Definition().Name.String(), maxRetries)
 	if err != nil {
 		return nil, nil, "", fmt.Errorf("failed to get the action tool parameters: %v", err)
 	}

-	chosenAction := a.availableActions().Find(params.actioName)
+	if params.actionParams == nil {
+		xlog.Debug("[pickAction] no action params found")
+		return nil, nil, params.message, nil
+	}

-	// xlog.Debug("[pickAction] params", "params", params)
+	actionChoice := action.IntentResponse{}
+	err = params.actionParams.Unmarshal(&actionChoice)
+	if err != nil {
+		return nil, nil, "", err
+	}

-	// if params.actionParams == nil {
-	// 	return nil, nil, params.message, nil
-	// }
+	if actionChoice.Tool == "" || actionChoice.Tool == "reply" {
+		xlog.Debug("[pickAction] no action found, replying")
+		return nil, nil, "", nil
+	}

-	// xlog.Debug("[pickAction] actionChoice", "actionChoice", params.actionParams, "message", params.message)
+	chosenAction := a.availableActions().Find(actionChoice.Tool)

-	// actionChoice := action.IntentResponse{}
+	xlog.Debug("[pickAction] chosenAction", "chosenAction", chosenAction, "actionName", actionChoice.Tool)

-	// err = params.actionParams.Unmarshal(&actionChoice)
-	// if err != nil {
-	// 	return nil, nil, "", err
-	// }
+	// // Let's double check if the action is correct by asking the LLM to judge it

-	// if actionChoice.Tool == "" || actionChoice.Tool == "none" {
-	// 	return nil, nil, "", nil
-	// }
+	// if chosenAction!= nil {
+	// 	promptString:= "Given the following goal and thoughts, is the action correct? \n\n"
+	// 	promptString+= fmt.Sprintf("Goal: %s\n", goalResponse.Goal)
+	// 	promptString+= fmt.Sprintf("Thoughts: %s\n", originalReasoning)
+	// 	promptString+= fmt.Sprintf("Action: %s\n", chosenAction.Definition().Name.String())
+	// 	promptString+= fmt.Sprintf("Action description: %s\n", chosenAction.Definition().Description)
+	// 	promptString+= fmt.Sprintf("Action parameters: %s\n", params.actionParams)

-	// // Find the action
-	// chosenAction := a.availableActions().Find(actionChoice.Tool)
-	// if chosenAction == nil {
-	// 	return nil, nil, "", fmt.Errorf("no action found for intent:" + actionChoice.Tool)
 	// }

 	return chosenAction, nil, originalReasoning, nil
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -249,7 +249,7 @@ func (a *Agent) runAction(ctx context.Context, chosenAction types.Action, params
 		}
 	}

-	xlog.Info("Running action", "action", chosenAction.Definition().Name, "agent", a.Character.Name)
+	xlog.Info("[runAction] Running action", "action", chosenAction.Definition().Name, "agent", a.Character.Name, "params", params.String())

 	if chosenAction.Definition().Name.Is(action.StateActionName) {
 		// We need to store the result in the state
@@ -270,6 +270,8 @@ func (a *Agent) runAction(ctx context.Context, chosenAction types.Action, params
 		}
 	}

+	xlog.Debug("[runAction] Action result", "action", chosenAction.Definition().Name, "params", params.String(), "result", result.Result)
+	
 	return result, nil
 }

@@ -603,7 +605,13 @@ func (a *Agent) consumeJob(job *types.Job, role string) {
 	var err error
 	conv, err = a.handlePlanning(job.GetContext(), job, chosenAction, actionParams, reasoning, pickTemplate, conv)
 	if err != nil {
-		job.Result.Finish(fmt.Errorf("error running action: %w", err))
+		xlog.Error("error handling planning", "error", err)
+		//job.Result.Conversation = conv
+		//job.Result.SetResponse(msg.Content)
+		a.reply(job, role, append(conv, openai.ChatCompletionMessage{
+			Role:    "assistant",
+			Content: fmt.Sprintf("Error handling planning: %v", err),
+		}), actionParams, chosenAction, reasoning)
 		return
 	}

@@ -689,26 +697,6 @@ func (a *Agent) consumeJob(job *types.Job, role string) {
 		job.SetNextAction(&followingAction, &followingParams, reasoning)
 		a.consumeJob(job, role)
 		return
-	} else if followingAction == nil {
-		xlog.Info("Not following another action", "agent", a.Character.Name)
-
-		if !a.options.forceReasoning {
-			xlog.Info("Finish conversation with reasoning", "reasoning", reasoning, "agent", a.Character.Name)
-
-			msg := openai.ChatCompletionMessage{
-				Role:    "assistant",
-				Content: reasoning,
-			}
-
-			conv = append(conv, msg)
-			job.Result.SetResponse(msg.Content)
-			job.Result.Conversation = conv
-			job.Result.AddFinalizer(func(conv []openai.ChatCompletionMessage) {
-				a.saveCurrentConversation(conv)
-			})
-			job.Result.Finish(nil)
-			return
-		}
 	}

 	a.reply(job, role, conv, actionParams, chosenAction, reasoning)
--- a/core/agent/agent_test.go
+++ b/core/agent/agent_test.go
@@ -126,6 +126,8 @@ var _ = Describe("Agent test", func() {
 			agent, err := New(
 				WithLLMAPIURL(apiURL),
 				WithModel(testModel),
+				EnableForceReasoning,
+				WithTimeout("10m"),
 				WithLoopDetectionSteps(3),
 				//	WithRandomIdentity(),
 				WithActions(&TestAction{response: map[string]string{
@@ -174,7 +176,7 @@ var _ = Describe("Agent test", func() {
 			agent, err := New(
 				WithLLMAPIURL(apiURL),
 				WithModel(testModel),
-
+				WithTimeout("10m"),
 				//	WithRandomIdentity(),
 				WithActions(&TestAction{response: map[string]string{
 					"boston": testActionResult,
@@ -199,6 +201,7 @@ var _ = Describe("Agent test", func() {
 			agent, err := New(
 				WithLLMAPIURL(apiURL),
 				WithModel(testModel),
+				WithTimeout("10m"),
 				EnableHUD,
 				//	EnableStandaloneJob,
 				//	WithRandomIdentity(),
--- a/core/agent/templates.go
+++ b/core/agent/templates.go
@@ -115,7 +115,7 @@ Available Tools:
 const reSelfEvalTemplate = pickSelfTemplate

 const pickActionTemplate = hudTemplate + `
-Your only task is to analyze the situation and determine a goal and the best tool to use, or just a final response if we have fullfilled the goal.
+Your only task is to analyze the conversation and determine a goal and the best tool to use, or just a final response if we have fullfilled the goal.

 Guidelines:
 1. Review the current state, what was done already and context