diff --git a/core/agent/agent.go b/core/agent/agent.go
index 658fec9..016d9bf 100644
--- a/core/agent/agent.go
+++ b/core/agent/agent.go
@@ -525,7 +525,7 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
if ok {
triggeredBy = name
xlog.Info("Job triggered by filter", "filter", name)
- }
+ }
} else if !ok {
failedBy = name
xlog.Info("Job failed filter", "filter", name)
@@ -560,7 +560,6 @@ func (a *Agent) filterJob(job *types.Job) (ok bool, err error) {
}
func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
-
if err := job.GetContext().Err(); err != nil {
job.Result.Finish(fmt.Errorf("expired"))
return
@@ -659,12 +658,9 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
}
}
- //xlog.Debug("Picked action", "agent", a.Character.Name, "action", chosenAction.Definition().Name, "reasoning", reasoning)
if chosenAction == nil {
// If no action was picked up, the reasoning is the message returned by the assistant
// so we can consume it as if it was a reply.
- //job.Result.SetResult(ActionState{ActionCurrentState{nil, nil, "No action to do, just reply"}, ""})
- //job.Result.Finish(fmt.Errorf("no action to do"))\
xlog.Info("No action to do, just reply", "agent", a.Character.Name, "reasoning", reasoning)
if reasoning != "" {
@@ -684,6 +680,23 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
reasoning = msg.Content
}
+ var satisfied bool
+ var err error
+ // Evaluate the response
+ satisfied, conv, err = a.handleEvaluation(job, conv, job.GetEvaluationLoop())
+ if err != nil {
+ job.Result.Finish(fmt.Errorf("error evaluating response: %w", err))
+ return
+ }
+
+ if !satisfied {
+ // If not satisfied, continue with the conversation
+ job.ConversationHistory = conv
+ job.IncrementEvaluationLoop()
+ a.consumeJob(job, role, retries)
+ return
+ }
+
xlog.Debug("Finish job with reasoning", "reasoning", reasoning, "agent", a.Character.Name, "conversation", fmt.Sprintf("%+v", conv))
job.Result.Conversation = conv
job.Result.AddFinalizer(func(conv []openai.ChatCompletionMessage) {
@@ -773,8 +786,6 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
conv, err = a.handlePlanning(job.GetContext(), job, chosenAction, actionParams, reasoning, pickTemplate, conv)
if err != nil {
xlog.Error("error handling planning", "error", err)
- //job.Result.Conversation = conv
- //job.Result.SetResponse(msg.Content)
a.reply(job, role, append(conv, openai.ChatCompletionMessage{
Role: "assistant",
Content: fmt.Sprintf("Error handling planning: %v", err),
@@ -821,9 +832,6 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
if !chosenAction.Definition().Name.Is(action.PlanActionName) {
result, err := a.runAction(job, chosenAction, actionParams)
if err != nil {
- //job.Result.Finish(fmt.Errorf("error running action: %w", err))
- //return
- // make the LLM aware of the error of running the action instead of stopping the job here
result.Result = fmt.Sprintf("Error running tool: %v", err)
}
@@ -866,6 +874,22 @@ func (a *Agent) consumeJob(job *types.Job, role string, retries int) {
return
}
+ // Evaluate the final response
+ var satisfied bool
+ satisfied, conv, err = a.handleEvaluation(job, conv, job.GetEvaluationLoop())
+ if err != nil {
+ job.Result.Finish(fmt.Errorf("error evaluating response: %w", err))
+ return
+ }
+
+ if !satisfied {
+ // If not satisfied, continue with the conversation
+ job.ConversationHistory = conv
+ job.IncrementEvaluationLoop()
+ a.consumeJob(job, role, retries)
+ return
+ }
+
a.reply(job, role, conv, actionParams, chosenAction, reasoning)
}
diff --git a/core/agent/evaluation.go b/core/agent/evaluation.go
new file mode 100644
index 0000000..7373f91
--- /dev/null
+++ b/core/agent/evaluation.go
@@ -0,0 +1,162 @@
+package agent
+
+import (
+ "fmt"
+
+ "github.com/mudler/LocalAGI/core/types"
+ "github.com/mudler/LocalAGI/pkg/llm"
+ "github.com/mudler/LocalAGI/pkg/xlog"
+ "github.com/sashabaranov/go-openai"
+ "github.com/sashabaranov/go-openai/jsonschema"
+)
+
+type EvaluationResult struct {
+ Satisfied bool `json:"satisfied"`
+ Gaps []string `json:"gaps"`
+ Reasoning string `json:"reasoning"`
+}
+
+type GoalExtraction struct {
+ Goal string `json:"goal"`
+ Constraints []string `json:"constraints"`
+ Context string `json:"context"`
+}
+
+func (a *Agent) extractGoal(job *types.Job, conv []openai.ChatCompletionMessage) (*GoalExtraction, error) {
+ // Create the goal extraction schema
+ schema := jsonschema.Definition{
+ Type: jsonschema.Object,
+ Properties: map[string]jsonschema.Definition{
+ "goal": {
+ Type: jsonschema.String,
+ Description: "The main goal or request from the user",
+ },
+ "constraints": {
+ Type: jsonschema.Array,
+ Items: &jsonschema.Definition{
+ Type: jsonschema.String,
+ },
+ Description: "Any constraints or requirements specified by the user",
+ },
+ "context": {
+ Type: jsonschema.String,
+ Description: "Additional context that might be relevant for understanding the goal",
+ },
+ },
+ Required: []string{"goal", "constraints", "context"},
+ }
+
+ // Create the goal extraction prompt
+ prompt := `Analyze the conversation and extract the user's main goal, any constraints, and relevant context.
+Consider the entire conversation history to understand the complete context and requirements.
+Focus on identifying the primary objective and any specific requirements or limitations mentioned.`
+
+ var result GoalExtraction
+ err := llm.GenerateTypedJSONWithConversation(job.GetContext(), a.client,
+ append(
+ []openai.ChatCompletionMessage{
+ {
+ Role: "system",
+ Content: prompt,
+ },
+ },
+ conv...), a.options.LLMAPI.Model, schema, &result)
+ if err != nil {
+ return nil, fmt.Errorf("error extracting goal: %w", err)
+ }
+
+ return &result, nil
+}
+
+func (a *Agent) evaluateJob(job *types.Job, conv []openai.ChatCompletionMessage) (*EvaluationResult, error) {
+ if !a.options.enableEvaluation {
+ return &EvaluationResult{Satisfied: true}, nil
+ }
+
+ // Extract the goal first
+ goal, err := a.extractGoal(job, conv)
+ if err != nil {
+ return nil, fmt.Errorf("error extracting goal: %w", err)
+ }
+
+ // Create the evaluation schema
+ schema := jsonschema.Definition{
+ Type: jsonschema.Object,
+ Properties: map[string]jsonschema.Definition{
+ "satisfied": {
+ Type: jsonschema.Boolean,
+ },
+ "gaps": {
+ Type: jsonschema.Array,
+ Items: &jsonschema.Definition{
+ Type: jsonschema.String,
+ },
+ },
+ "reasoning": {
+ Type: jsonschema.String,
+ },
+ },
+ Required: []string{"satisfied", "gaps", "reasoning"},
+ }
+
+ // Create the evaluation prompt
+ prompt := fmt.Sprintf(`Evaluate if the assistant has satisfied the user's request. Consider:
+1. The identified goal: %s
+2. Constraints and requirements: %v
+3. Context: %s
+4. The conversation history
+5. Any gaps or missing information
+6. Whether the response fully addresses the user's needs
+
+Provide a detailed evaluation with specific gaps if any are found.`,
+ goal.Goal,
+ goal.Constraints,
+ goal.Context)
+
+ var result EvaluationResult
+ err = llm.GenerateTypedJSONWithConversation(job.GetContext(), a.client,
+ append(
+ []openai.ChatCompletionMessage{
+ {
+ Role: "system",
+ Content: prompt,
+ },
+ },
+ conv...),
+ a.options.LLMAPI.Model, schema, &result)
+ if err != nil {
+ return nil, fmt.Errorf("error generating evaluation: %w", err)
+ }
+
+ return &result, nil
+}
+
+func (a *Agent) handleEvaluation(job *types.Job, conv []openai.ChatCompletionMessage, currentLoop int) (bool, []openai.ChatCompletionMessage, error) {
+ if !a.options.enableEvaluation || currentLoop >= a.options.maxEvaluationLoops {
+ return true, conv, nil
+ }
+
+ result, err := a.evaluateJob(job, conv)
+ if err != nil {
+ return false, conv, err
+ }
+
+ if result.Satisfied {
+ return true, conv, nil
+ }
+
+ // If there are gaps, we need to address them
+ if len(result.Gaps) > 0 {
+ // Add the evaluation result to the conversation
+ conv = append(conv, openai.ChatCompletionMessage{
+ Role: "system",
+ Content: fmt.Sprintf("Evaluation found gaps that need to be addressed:\n%s\nReasoning: %s",
+ result.Gaps, result.Reasoning),
+ })
+
+ xlog.Debug("Evaluation found gaps, incrementing loop count", "loop", currentLoop+1)
+ return false, conv, nil
+ }
+
+ return true, conv, nil
+}
diff --git a/core/agent/identity.go b/core/agent/identity.go
index a474d67..60ff14f 100644
--- a/core/agent/identity.go
+++ b/core/agent/identity.go
@@ -12,7 +12,7 @@ func (a *Agent) generateIdentity(guidance string) error {
guidance = "Generate a random character for roleplaying."
}
- err := llm.GenerateTypedJSON(a.context.Context, a.client, "Generate a character as JSON data. "+guidance, a.options.LLMAPI.Model, a.options.character.ToJSONSchema(), &a.options.character)
+ err := llm.GenerateTypedJSONWithGuidance(a.context.Context, a.client, "Generate a character as JSON data. "+guidance, a.options.LLMAPI.Model, a.options.character.ToJSONSchema(), &a.options.character)
//err := llm.GenerateJSONFromStruct(a.context.Context, a.client, guidance, a.options.LLMAPI.Model, &a.options.character)
a.Character = a.options.character
if err != nil {
diff --git a/core/agent/options.go b/core/agent/options.go
index 4943dad..c7f4514 100644
--- a/core/agent/options.go
+++ b/core/agent/options.go
@@ -42,6 +42,10 @@ type options struct {
kbResults int
ragdb RAGDB
+ // Evaluation settings
+ maxEvaluationLoops int
+ enableEvaluation bool
+
prompts []DynamicPrompt
systemPrompt string
@@ -68,9 +72,11 @@ func (o *options) SeparatedMultimodalModel() bool {
func defaultOptions() *options {
return &options{
- parallelJobs: 1,
- periodicRuns: 15 * time.Minute,
+ parallelJobs: 1,
+ periodicRuns: 15 * time.Minute,
loopDetectionSteps: 10,
+ maxEvaluationLoops: 2,
+ enableEvaluation: false,
LLMAPI: llmOptions{
APIURL: "http://localhost:8080",
Model: "gpt-4",
@@ -392,3 +398,17 @@ var EnableStripThinkingTags = func(o *options) error {
o.stripThinkingTags = true
return nil
}
+
+func WithMaxEvaluationLoops(loops int) Option {
+ return func(o *options) error {
+ o.maxEvaluationLoops = loops
+ return nil
+ }
+}
+
+func EnableEvaluation() Option {
+ return func(o *options) error {
+ o.enableEvaluation = true
+ return nil
+ }
+}
diff --git a/core/state/config.go b/core/state/config.go
index d807ef7..0898975 100644
--- a/core/state/config.go
+++ b/core/state/config.go
@@ -74,6 +74,8 @@ type AgentConfig struct {
SummaryLongTermMemory bool `json:"summary_long_term_memory" form:"summary_long_term_memory"`
ParallelJobs int `json:"parallel_jobs" form:"parallel_jobs"`
StripThinkingTags bool `json:"strip_thinking_tags" form:"strip_thinking_tags"`
+ EnableEvaluation bool `json:"enable_evaluation" form:"enable_evaluation"`
+ MaxEvaluationLoops int `json:"max_evaluation_loops" form:"max_evaluation_loops"`
}
type AgentConfigMeta struct {
@@ -309,6 +311,24 @@ func NewAgentConfigMeta(
HelpText: "Remove content between and tags from agent responses",
Tags: config.Tags{Section: "ModelSettings"},
},
+ {
+ Name: "enable_evaluation",
+ Label: "Enable Evaluation",
+ Type: "checkbox",
+ DefaultValue: false,
+ HelpText: "Enable automatic evaluation of agent responses to ensure they meet user requirements",
+ Tags: config.Tags{Section: "AdvancedSettings"},
+ },
+ {
+ Name: "max_evaluation_loops",
+ Label: "Max Evaluation Loops",
+ Type: "number",
+ DefaultValue: 2,
+ Min: 1,
+ Step: 1,
+ HelpText: "Maximum number of evaluation loops to perform when addressing gaps in responses",
+ Tags: config.Tags{Section: "AdvancedSettings"},
+ },
},
MCPServers: []config.Field{
{
diff --git a/core/state/pool.go b/core/state/pool.go
index 5cbac14..1ca8415 100644
--- a/core/state/pool.go
+++ b/core/state/pool.go
@@ -247,7 +247,7 @@ func createAgentAvatar(APIURL, APIKey, model, imageModel, avatarDir string, agen
ImagePrompt string `json:"image_prompt"`
}
- err := llm.GenerateTypedJSON(
+ err := llm.GenerateTypedJSONWithGuidance(
context.Background(),
llm.NewClient(APIKey, APIURL, "10m"),
"Generate a prompt that I can use to create a random avatar for the bot '"+agent.Name+"', the description of the bot is: "+agent.Description,
@@ -561,6 +561,13 @@ func (a *AgentPool) startAgentWithConfig(name string, config *AgentConfig, obs O
opts = append(opts, WithParallelJobs(config.ParallelJobs))
}
+ if config.EnableEvaluation {
+ opts = append(opts, EnableEvaluation())
+ if config.MaxEvaluationLoops > 0 {
+ opts = append(opts, WithMaxEvaluationLoops(config.MaxEvaluationLoops))
+ }
+ }
+
xlog.Info("Starting agent", "name", name, "config", config)
agent, err := New(opts...)
diff --git a/core/types/job.go b/core/types/job.go
index 7a48ee1..c701c05 100644
--- a/core/types/job.go
+++ b/core/types/job.go
@@ -162,23 +162,23 @@ func newUUID() string {
// To wait for a Job result, use JobResult.WaitResult()
func NewJob(opts ...JobOption) *Job {
j := &Job{
- Result: NewJobResult(),
- UUID: newUUID(),
- }
- for _, o := range opts {
- o(j)
+ Result: NewJobResult(),
+ UUID: uuid.New().String(),
+ Metadata: make(map[string]interface{}),
+ context: context.Background(),
+ ConversationHistory: []openai.ChatCompletionMessage{},
}
- var ctx context.Context
- if j.context == nil {
- ctx = context.Background()
- } else {
- ctx = j.context
+ for _, opt := range opts {
+ opt(j)
}
- context, cancel := context.WithCancel(ctx)
- j.context = context
+ // Store the original request if it exists in the conversation history
+
+ ctx, cancel := context.WithCancel(j.context)
+ j.context = ctx
j.cancel = cancel
+
return j
}
@@ -207,3 +207,23 @@ func WithObservable(obs *Observable) JobOption {
j.Obs = obs
}
}
+
+// GetEvaluationLoop returns the current evaluation loop count
+func (j *Job) GetEvaluationLoop() int {
+ if j.Metadata == nil {
+ j.Metadata = make(map[string]interface{})
+ }
+ if loop, ok := j.Metadata["evaluation_loop"].(int); ok {
+ return loop
+ }
+ return 0
+}
+
+// IncrementEvaluationLoop increments the evaluation loop count
+func (j *Job) IncrementEvaluationLoop() {
+ if j.Metadata == nil {
+ j.Metadata = make(map[string]interface{})
+ }
+ currentLoop := j.GetEvaluationLoop()
+ j.Metadata["evaluation_loop"] = currentLoop + 1
+}
diff --git a/pkg/llm/json.go b/pkg/llm/json.go
index 5386fe1..c4f48d1 100644
--- a/pkg/llm/json.go
+++ b/pkg/llm/json.go
@@ -10,16 +10,20 @@ import (
"github.com/sashabaranov/go-openai/jsonschema"
)
-func GenerateTypedJSON(ctx context.Context, client *openai.Client, guidance, model string, i jsonschema.Definition, dst any) error {
+func GenerateTypedJSONWithGuidance(ctx context.Context, client *openai.Client, guidance, model string, i jsonschema.Definition, dst any) error {
+ return GenerateTypedJSONWithConversation(ctx, client, []openai.ChatCompletionMessage{
+ {
+ Role: "user",
+ Content: guidance,
+ },
+ }, model, i, dst)
+}
+
+func GenerateTypedJSONWithConversation(ctx context.Context, client *openai.Client, conv []openai.ChatCompletionMessage, model string, i jsonschema.Definition, dst any) error {
toolName := "json"
decision := openai.ChatCompletionRequest{
- Model: model,
- Messages: []openai.ChatCompletionMessage{
- {
- Role: "user",
- Content: guidance,
- },
- },
+ Model: model,
+ Messages: conv,
Tools: []openai.Tool{
{
diff --git a/services/filters/classifier.go b/services/filters/classifier.go
index d85aa4f..e517a7c 100644
--- a/services/filters/classifier.go
+++ b/services/filters/classifier.go
@@ -78,7 +78,7 @@ func (f *ClassifierFilter) Apply(job *types.Job) (bool, error) {
var result struct {
Asserted bool `json:"answer"`
}
- err := llm.GenerateTypedJSON(job.GetContext(), f.client, guidance, f.model, jsonschema.Definition{
+ err := llm.GenerateTypedJSONWithGuidance(job.GetContext(), f.client, guidance, f.model, jsonschema.Definition{
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"answer": {
diff --git a/webui/app.go b/webui/app.go
index acf5291..ff4b29e 100644
--- a/webui/app.go
+++ b/webui/app.go
@@ -576,7 +576,7 @@ func (a *App) GenerateGroupProfiles(pool *state.AgentPool) func(c *fiber.Ctx) er
xlog.Debug("Generating group", "description", request.Descript)
client := llm.NewClient(a.config.LLMAPIKey, a.config.LLMAPIURL, "10m")
- err := llm.GenerateTypedJSON(c.Context(), client, request.Descript, a.config.LLMModel, jsonschema.Definition{
+ err := llm.GenerateTypedJSONWithGuidance(c.Context(), client, request.Descript, a.config.LLMModel, jsonschema.Definition{
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"agents": {