feat(evaluation): add deep evaluation mechanism (#145)

* feat(evaluation): add deep evaluation mechanism Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * consider whole conversation when evaluating Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: mudler <mudler@localai.io>
2025-05-11 18:31:04 +02:00
parent 289edb67a6
commit e431bc234b
10 changed files with 293 additions and 36 deletions
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -525,7 +525,7 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
 			if ok {
 				triggeredBy = name
 				xlog.Info("Job triggered by filter", "filter", name)
-			} 
+			}
 		} else if !ok {
 			failedBy = name
 			xlog.Info("Job failed filter", "filter", name)
@@ -560,7 +560,6 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
 }

 func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
-
 	if err := job.GetContext().Err(); err != nil {
 		job.Result.Finish(fmt.Errorf("expired"))
 		return
@@ -659,12 +658,9 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 		}
 	}

-	//xlog.Debug("Picked action", "agent", a.Character.Name, "action", chosenAction.Definition().Name, "reasoning", reasoning)
 	if chosenAction == nil {
 		// If no action was picked up, the reasoning is the message returned by the assistant
 		// so we can consume it as if it was a reply.
-		//job.Result.SetResult(ActionState{ActionCurrentState{nil, nil, "No action to do, just reply"}, ""})
-		//job.Result.Finish(fmt.Errorf("no action to do"))\
 		xlog.Info("No action to do, just reply", "agent", a.Character.Name, "reasoning", reasoning)

 		if reasoning != "" {
@@ -684,6 +680,23 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 			reasoning = msg.Content
 		}

+		var satisfied bool
+		var err error
+		// Evaluate the response
+		satisfied, conv, err = a.handleEvaluation(job, conv, job.GetEvaluationLoop())
+		if err != nil {
+			job.Result.Finish(fmt.Errorf("error evaluating response: %w", err))
+			return
+		}
+
+		if !satisfied {
+			// If not satisfied, continue with the conversation
+			job.ConversationHistory = conv
+			job.IncrementEvaluationLoop()
+			a.consumeJob(job, role, retries)
+			return
+		}
+
 		xlog.Debug("Finish job with reasoning", "reasoning", reasoning, "agent", a.Character.Name, "conversation", fmt.Sprintf("%+v", conv))
 		job.Result.Conversation = conv
 		job.Result.AddFinalizer(func(conv []openai.ChatCompletionMessage) {
@@ -773,8 +786,6 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 	conv, err = a.handlePlanning(job.GetContext(), job, chosenAction, actionParams, reasoning, pickTemplate, conv)
 	if err != nil {
 		xlog.Error("error handling planning", "error", err)
-		//job.Result.Conversation = conv
-		//job.Result.SetResponse(msg.Content)
 		a.reply(job, role, append(conv, openai.ChatCompletionMessage{
 			Role:    "assistant",
 			Content: fmt.Sprintf("Error handling planning: %v", err),
@@ -821,9 +832,6 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 	if !chosenAction.Definition().Name.Is(action.PlanActionName) {
 		result, err := a.runAction(job, chosenAction, actionParams)
 		if err != nil {
-			//job.Result.Finish(fmt.Errorf("error running action: %w", err))
-			//return
-			// make the LLM aware of the error of running the action instead of stopping the job here
 			result.Result = fmt.Sprintf("Error running tool: %v", err)
 		}

@@ -866,6 +874,22 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
 		return
 	}

+	// Evaluate the final response
+	var satisfied bool
+	satisfied, conv, err = a.handleEvaluation(job, conv, job.GetEvaluationLoop())
+	if err != nil {
+		job.Result.Finish(fmt.Errorf("error evaluating response: %w", err))
+		return
+	}
+
+	if !satisfied {
+		// If not satisfied, continue with the conversation
+		job.ConversationHistory = conv
+		job.IncrementEvaluationLoop()
+		a.consumeJob(job, role, retries)
+		return
+	}
+
 	a.reply(job, role, conv, actionParams, chosenAction, reasoning)
 }