* feat(evaluation): add deep evaluation mechanism Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * consider whole conversation when evaluating Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: mudler <mudler@localai.io>
163 lines
4.4 KiB
Go
163 lines
4.4 KiB
Go
package agent
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/mudler/LocalAGI/core/types"
|
|
"github.com/mudler/LocalAGI/pkg/llm"
|
|
"github.com/mudler/LocalAGI/pkg/xlog"
|
|
"github.com/sashabaranov/go-openai"
|
|
"github.com/sashabaranov/go-openai/jsonschema"
|
|
)
|
|
|
|
type EvaluationResult struct {
|
|
Satisfied bool `json:"satisfied"`
|
|
Gaps []string `json:"gaps"`
|
|
Reasoning string `json:"reasoning"`
|
|
}
|
|
|
|
type GoalExtraction struct {
|
|
Goal string `json:"goal"`
|
|
Constraints []string `json:"constraints"`
|
|
Context string `json:"context"`
|
|
}
|
|
|
|
func (a *Agent) extractGoal(job *types.Job, conv []openai.ChatCompletionMessage) (*GoalExtraction, error) {
|
|
// Create the goal extraction schema
|
|
schema := jsonschema.Definition{
|
|
Type: jsonschema.Object,
|
|
Properties: map[string]jsonschema.Definition{
|
|
"goal": {
|
|
Type: jsonschema.String,
|
|
Description: "The main goal or request from the user",
|
|
},
|
|
"constraints": {
|
|
Type: jsonschema.Array,
|
|
Items: &jsonschema.Definition{
|
|
Type: jsonschema.String,
|
|
},
|
|
Description: "Any constraints or requirements specified by the user",
|
|
},
|
|
"context": {
|
|
Type: jsonschema.String,
|
|
Description: "Additional context that might be relevant for understanding the goal",
|
|
},
|
|
},
|
|
Required: []string{"goal", "constraints", "context"},
|
|
}
|
|
|
|
// Create the goal extraction prompt
|
|
prompt := `Analyze the conversation and extract the user's main goal, any constraints, and relevant context.
|
|
Consider the entire conversation history to understand the complete context and requirements.
|
|
Focus on identifying the primary objective and any specific requirements or limitations mentioned.`
|
|
|
|
var result GoalExtraction
|
|
err := llm.GenerateTypedJSONWithConversation(job.GetContext(), a.client,
|
|
append(
|
|
[]openai.ChatCompletionMessage{
|
|
{
|
|
Role: "system",
|
|
Content: prompt,
|
|
},
|
|
},
|
|
conv...), a.options.LLMAPI.Model, schema, &result)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error extracting goal: %w", err)
|
|
}
|
|
|
|
return &result, nil
|
|
}
|
|
|
|
func (a *Agent) evaluateJob(job *types.Job, conv []openai.ChatCompletionMessage) (*EvaluationResult, error) {
|
|
if !a.options.enableEvaluation {
|
|
return &EvaluationResult{Satisfied: true}, nil
|
|
}
|
|
|
|
// Extract the goal first
|
|
goal, err := a.extractGoal(job, conv)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error extracting goal: %w", err)
|
|
}
|
|
|
|
// Create the evaluation schema
|
|
schema := jsonschema.Definition{
|
|
Type: jsonschema.Object,
|
|
Properties: map[string]jsonschema.Definition{
|
|
"satisfied": {
|
|
Type: jsonschema.Boolean,
|
|
},
|
|
"gaps": {
|
|
Type: jsonschema.Array,
|
|
Items: &jsonschema.Definition{
|
|
Type: jsonschema.String,
|
|
},
|
|
},
|
|
"reasoning": {
|
|
Type: jsonschema.String,
|
|
},
|
|
},
|
|
Required: []string{"satisfied", "gaps", "reasoning"},
|
|
}
|
|
|
|
// Create the evaluation prompt
|
|
prompt := fmt.Sprintf(`Evaluate if the assistant has satisfied the user's request. Consider:
|
|
1. The identified goal: %s
|
|
2. Constraints and requirements: %v
|
|
3. Context: %s
|
|
4. The conversation history
|
|
5. Any gaps or missing information
|
|
6. Whether the response fully addresses the user's needs
|
|
|
|
Provide a detailed evaluation with specific gaps if any are found.`,
|
|
goal.Goal,
|
|
goal.Constraints,
|
|
goal.Context)
|
|
|
|
var result EvaluationResult
|
|
err = llm.GenerateTypedJSONWithConversation(job.GetContext(), a.client,
|
|
append(
|
|
[]openai.ChatCompletionMessage{
|
|
{
|
|
Role: "system",
|
|
Content: prompt,
|
|
},
|
|
},
|
|
conv...),
|
|
a.options.LLMAPI.Model, schema, &result)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error generating evaluation: %w", err)
|
|
}
|
|
|
|
return &result, nil
|
|
}
|
|
|
|
func (a *Agent) handleEvaluation(job *types.Job, conv []openai.ChatCompletionMessage, currentLoop int) (bool, []openai.ChatCompletionMessage, error) {
|
|
if !a.options.enableEvaluation || currentLoop >= a.options.maxEvaluationLoops {
|
|
return true, conv, nil
|
|
}
|
|
|
|
result, err := a.evaluateJob(job, conv)
|
|
if err != nil {
|
|
return false, conv, err
|
|
}
|
|
|
|
if result.Satisfied {
|
|
return true, conv, nil
|
|
}
|
|
|
|
// If there are gaps, we need to address them
|
|
if len(result.Gaps) > 0 {
|
|
// Add the evaluation result to the conversation
|
|
conv = append(conv, openai.ChatCompletionMessage{
|
|
Role: "system",
|
|
Content: fmt.Sprintf("Evaluation found gaps that need to be addressed:\n%s\nReasoning: %s",
|
|
result.Gaps, result.Reasoning),
|
|
})
|
|
|
|
xlog.Debug("Evaluation found gaps, incrementing loop count", "loop", currentLoop+1)
|
|
return false, conv, nil
|
|
}
|
|
|
|
return true, conv, nil
|
|
}
|