feat: improve parameter generation by forcing reasoning (#193)

* feat: improve parameter generation by forcing reasoning Signed-off-by: mudler <mudler@localai.io> * Update core/agent/actions.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update core/agent/actions.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Try to change default models Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: mudler <mudler@localai.io> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-06-01 10:11:04 +02:00
parent f0dac5ca22
commit 56b6f7240c
4 changed files with 68 additions and 23 deletions
--- a/2
+++ b/2
@@ -11,7 +11,7 @@ cleanup-tests:
 	docker compose down

 tests: prepare-tests
-	LOCALAGI_MCPBOX_URL="http://localhost:9090" LOCALAGI_MODEL="gemma-3-12b-it-qat" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./...
+	LOCALAGI_MCPBOX_URL="http://localhost:9090" LOCALAGI_MODEL="gemma-3-4b-it-qat" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./...

 run-nokb:
 	$(MAKE) run KBDISABLEINDEX=true
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ MODEL_NAME=gemma-3-12b-it docker compose up

 # NVIDIA GPU setup with custom multimodal and image models
 MODEL_NAME=gemma-3-12b-it \
-MULTIMODAL_MODEL=minicpm-v-2_6 \
+MULTIMODAL_MODEL=moondream2-20250414 \
 IMAGE_MODEL=flux.1-dev-ggml \
 docker compose -f docker-compose.nvidia.yaml up
 ```
@@ -126,8 +126,8 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil
 - Supports text, multimodal, and image generation models
 - Run with: `docker compose -f docker-compose.nvidia.yaml up`
 - Default models:
-  - Text: `gemma-3-12b-it-qat`
-  - Multimodal: `minicpm-v-2_6`
+  - Text: `gemma-3-4b-it-qat`
+  - Multimodal: `moondream2-20250414`
  - Image: `sd-1.5-ggml`
 - Environment variables:
  - `MODEL_NAME`: Text model to use
@@ -142,8 +142,8 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil
 - Supports text, multimodal, and image generation models
 - Run with: `docker compose -f docker-compose.intel.yaml up`
 - Default models:
-  - Text: `gemma-3-12b-it-qat`
-  - Multimodal: `minicpm-v-2_6`
+  - Text: `gemma-3-4b-it-qat`
+  - Multimodal: `moondream2-20250414`
  - Image: `sd-1.5-ggml`
 - Environment variables:
  - `MODEL_NAME`: Text model to use
@@ -161,20 +161,20 @@ MODEL_NAME=gemma-3-12b-it docker compose up

 # NVIDIA GPU with custom models
 MODEL_NAME=gemma-3-12b-it \
-MULTIMODAL_MODEL=minicpm-v-2_6 \
+MULTIMODAL_MODEL=moondream2-20250414 \
 IMAGE_MODEL=flux.1-dev-ggml \
 docker compose -f docker-compose.nvidia.yaml up

 # Intel GPU with custom models
 MODEL_NAME=gemma-3-12b-it \
-MULTIMODAL_MODEL=minicpm-v-2_6 \
+MULTIMODAL_MODEL=moondream2-20250414 \
 IMAGE_MODEL=sd-1.5-ggml \
 docker compose -f docker-compose.intel.yaml up
 ```

 If no models are specified, it will use the defaults:
- Text model: `gemma-3-12b-it-qat`
- Multimodal model: `minicpm-v-2_6`
+- Text model: `gemma-3-4b-it-qat`
+- Multimodal model: `moondream2-20250414`
 - Image model: `sd-1.5-ggml`

 Good (relatively small) models that have been tested are:
--- a/core/agent/actions.go
+++ b/core/agent/actions.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
+	"strings"

 	"github.com/mudler/LocalAGI/core/action"
 	"github.com/mudler/LocalAGI/core/types"
@@ -12,12 +13,24 @@ import (
 	"github.com/mudler/LocalAGI/pkg/xlog"

 	"github.com/sashabaranov/go-openai"
+	"github.com/sashabaranov/go-openai/jsonschema"
 )

+const parameterReasoningPrompt = `You are tasked with generating the optimal parameters for the action "%s". The action requires the following parameters:
+%s
+
+Your task is to:
+1. Generate the best possible values for each required parameter
+2. If the parameter requires code, provide complete, working code
+3. If the parameter requires text or documentation, provide comprehensive, well-structured content
+4. Ensure all parameters are complete and ready to be used
+
+Focus on quality and completeness. Do not explain your reasoning or analyze the action's purpose - just provide the best possible parameter values.`
+
 type decisionResult struct {
 	actionParams types.ActionParams
 	message      string
-	actioName    string
+	actionName   string
 }

 // decision forces the agent to take one of the available actions
@@ -131,7 +144,7 @@ func (a *Agent) decision(
 			a.observer.Update(*obs)
 		}

-		return &decisionResult{actionParams: params, actioName: msg.ToolCalls[0].Function.Name, message: msg.Content}, nil
+		return &decisionResult{actionParams: params, actionName: msg.ToolCalls[0].Function.Name, message: msg.Content}, nil
 	}

 	return nil, fmt.Errorf("failed to make a decision after %d attempts: %w", maxRetries, lastErr)
@@ -248,9 +261,32 @@ func (a *Agent) generateParameters(job *types.Job, pickTemplate string, act type

 	cc := conversation
 	if a.options.forceReasoning {
+		// First, get the LLM to reason about optimal parameter usage
+		parameterReasoningPrompt := fmt.Sprintf(parameterReasoningPrompt,
+			act.Definition().Name,
+			formatProperties(act.Definition().Properties))
+
+		// Get initial reasoning about parameters using askLLM
+		paramReasoningMsg, err := a.askLLM(job.GetContext(),
+			append(conversation, openai.ChatCompletionMessage{
+				Role:    "system",
+				Content: parameterReasoningPrompt,
+			}),
+			maxAttempts,
+		)
+		if err != nil {
+			xlog.Warn("Failed to get parameter reasoning", "error", err)
+		}
+
+		// Combine original reasoning with parameter-specific reasoning
+		enhancedReasoning := reasoning
+		if paramReasoningMsg.Content != "" {
+			enhancedReasoning = fmt.Sprintf("%s\n\nParameter Analysis:\n%s", reasoning, paramReasoningMsg.Content)
+		}
+
 		cc = append(conversation, openai.ChatCompletionMessage{
 			Role:    "system",
-			Content: fmt.Sprintf("The agent decided to use the tool %s with the following reasoning: %s", act.Definition().Name, reasoning),
+			Content: fmt.Sprintf("The agent decided to use the tool %s with the following reasoning: %s", act.Definition().Name, enhancedReasoning),
 		})
 	}

@@ -273,6 +309,15 @@ func (a *Agent) generateParameters(job *types.Job, pickTemplate string, act type
 	return nil, fmt.Errorf("failed to generate parameters after %d attempts: %w", maxAttempts, attemptErr)
 }

+// Helper function to format properties for the prompt
+func formatProperties(props map[string]jsonschema.Definition) string {
+	var result strings.Builder
+	for name, prop := range props {
+		result.WriteString(fmt.Sprintf("- %s: %s\n", name, prop.Description))
+	}
+	return result.String()
+}
+
 func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction types.Action, actionParams types.ActionParams, reasoning string, pickTemplate string, conv Messages) (Messages, error) {
 	// Planning: run all the actions in sequence
 	if !chosenAction.Definition().Name.Is(action.PlanActionName) {
@@ -455,12 +500,12 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC
 			return nil, nil, "", err
 		}

-		xlog.Debug(fmt.Sprintf("thought action Name: %v", thought.actioName))
-		xlog.Debug(fmt.Sprintf("thought message: %v", thought.message))
+		xlog.Debug("thought action Name", "actionName", thought.actionName)
+		xlog.Debug("thought message", "message", thought.message)

 		// Find the action
-		chosenAction := a.availableActions().Find(thought.actioName)
-		if chosenAction == nil || thought.actioName == "" {
+		chosenAction := a.availableActions().Find(thought.actionName)
+		if chosenAction == nil || thought.actionName == "" {
 			xlog.Debug("no answer")

 			// LLM replied with an answer?
@@ -496,8 +541,8 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC
 	if err != nil {
 		return nil, nil, "", err
 	}
-	if thought.actioName != "" && thought.actioName != reasoningAction.Definition().Name.String() {
-		return nil, nil, "", fmt.Errorf("Expected reasoning action not: %s", thought.actioName)
+	if thought.actionName != "" && thought.actionName != reasoningAction.Definition().Name.String() {
+		return nil, nil, "", fmt.Errorf("expected reasoning action %s, got %s", reasoningAction.Definition().Name.String(), thought.actionName)
 	}

 	originalReasoning := ""
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -7,8 +7,8 @@ services:
    # Image list (dockerhub): https://hub.docker.com/r/localai/localai
    image: localai/localai:master
    command: 
-    - ${MODEL_NAME:-gemma-3-12b-it-qat}
-    - ${MULTIMODAL_MODEL:-minicpm-v-2_6}
+    - ${MODEL_NAME:-gemma-3-4b-it-qat}
+    - ${MULTIMODAL_MODEL:-moondream2-20250414}
    - ${IMAGE_MODEL:-sd-1.5-ggml}
    - granite-embedding-107m-multilingual
    healthcheck:
@@ -105,8 +105,8 @@ services:
      - 8080:3000
    #image: quay.io/mudler/localagi:master
    environment:
-      - LOCALAGI_MODEL=${MODEL_NAME:-gemma-3-12b-it-qat}
-      - LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-minicpm-v-2_6}
+      - LOCALAGI_MODEL=${MODEL_NAME:-gemma-3-4b-it-qat}
+      - LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-moondream2-20250414}
      - LOCALAGI_IMAGE_MODEL=${IMAGE_MODEL:-sd-1.5-ggml}
      - LOCALAGI_LLM_API_URL=http://localai:8080
      #- LOCALAGI_LLM_API_KEY=sk-1234567890