feat(evaluation): add deep evaluation mechanism (#145)

* feat(evaluation): add deep evaluation mechanism

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* consider whole conversation when evaluating

Signed-off-by: mudler <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-05-11 18:31:04 +02:00
committed by GitHub
parent 289edb67a6
commit e431bc234b
10 changed files with 293 additions and 36 deletions

View File

@@ -525,7 +525,7 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
if ok {
triggeredBy = name
xlog.Info("Job triggered by filter", "filter", name)
}
}
} else if !ok {
failedBy = name
xlog.Info("Job failed filter", "filter", name)
@@ -560,7 +560,6 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
}
func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
if err := job.GetContext().Err(); err != nil {
job.Result.Finish(fmt.Errorf("expired"))
return
@@ -659,12 +658,9 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
}
}
//xlog.Debug("Picked action", "agent", a.Character.Name, "action", chosenAction.Definition().Name, "reasoning", reasoning)
if chosenAction == nil {
// If no action was picked up, the reasoning is the message returned by the assistant
// so we can consume it as if it was a reply.
//job.Result.SetResult(ActionState{ActionCurrentState{nil, nil, "No action to do, just reply"}, ""})
//job.Result.Finish(fmt.Errorf("no action to do"))\
xlog.Info("No action to do, just reply", "agent", a.Character.Name, "reasoning", reasoning)
if reasoning != "" {
@@ -684,6 +680,23 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
reasoning = msg.Content
}
var satisfied bool
var err error
// Evaluate the response
satisfied, conv, err = a.handleEvaluation(job, conv, job.GetEvaluationLoop())
if err != nil {
job.Result.Finish(fmt.Errorf("error evaluating response: %w", err))
return
}
if !satisfied {
// If not satisfied, continue with the conversation
job.ConversationHistory = conv
job.IncrementEvaluationLoop()
a.consumeJob(job, role, retries)
return
}
xlog.Debug("Finish job with reasoning", "reasoning", reasoning, "agent", a.Character.Name, "conversation", fmt.Sprintf("%+v", conv))
job.Result.Conversation = conv
job.Result.AddFinalizer(func(conv []openai.ChatCompletionMessage) {
@@ -773,8 +786,6 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
conv, err = a.handlePlanning(job.GetContext(), job, chosenAction, actionParams, reasoning, pickTemplate, conv)
if err != nil {
xlog.Error("error handling planning", "error", err)
//job.Result.Conversation = conv
//job.Result.SetResponse(msg.Content)
a.reply(job, role, append(conv, openai.ChatCompletionMessage{
Role: "assistant",
Content: fmt.Sprintf("Error handling planning: %v", err),
@@ -821,9 +832,6 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
if !chosenAction.Definition().Name.Is(action.PlanActionName) {
result, err := a.runAction(job, chosenAction, actionParams)
if err != nil {
//job.Result.Finish(fmt.Errorf("error running action: %w", err))
//return
// make the LLM aware of the error of running the action instead of stopping the job here
result.Result = fmt.Sprintf("Error running tool: %v", err)
}
@@ -866,6 +874,22 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
return
}
// Evaluate the final response
var satisfied bool
satisfied, conv, err = a.handleEvaluation(job, conv, job.GetEvaluationLoop())
if err != nil {
job.Result.Finish(fmt.Errorf("error evaluating response: %w", err))
return
}
if !satisfied {
// If not satisfied, continue with the conversation
job.ConversationHistory = conv
job.IncrementEvaluationLoop()
a.consumeJob(job, role, retries)
return
}
a.reply(job, role, conv, actionParams, chosenAction, reasoning)
}