feat: improve parameter generation by forcing reasoning (#193)
* feat: improve parameter generation by forcing reasoning Signed-off-by: mudler <mudler@localai.io> * Update core/agent/actions.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update core/agent/actions.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Try to change default models Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: mudler <mudler@localai.io> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
f0dac5ca22
commit
56b6f7240c
2
Makefile
2
Makefile
@@ -11,7 +11,7 @@ cleanup-tests:
|
|||||||
docker compose down
|
docker compose down
|
||||||
|
|
||||||
tests: prepare-tests
|
tests: prepare-tests
|
||||||
LOCALAGI_MCPBOX_URL="http://localhost:9090" LOCALAGI_MODEL="gemma-3-12b-it-qat" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./...
|
LOCALAGI_MCPBOX_URL="http://localhost:9090" LOCALAGI_MODEL="gemma-3-4b-it-qat" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./...
|
||||||
|
|
||||||
run-nokb:
|
run-nokb:
|
||||||
$(MAKE) run KBDISABLEINDEX=true
|
$(MAKE) run KBDISABLEINDEX=true
|
||||||
|
|||||||
18
README.md
18
README.md
@@ -63,7 +63,7 @@ MODEL_NAME=gemma-3-12b-it docker compose up
|
|||||||
|
|
||||||
# NVIDIA GPU setup with custom multimodal and image models
|
# NVIDIA GPU setup with custom multimodal and image models
|
||||||
MODEL_NAME=gemma-3-12b-it \
|
MODEL_NAME=gemma-3-12b-it \
|
||||||
MULTIMODAL_MODEL=minicpm-v-2_6 \
|
MULTIMODAL_MODEL=moondream2-20250414 \
|
||||||
IMAGE_MODEL=flux.1-dev-ggml \
|
IMAGE_MODEL=flux.1-dev-ggml \
|
||||||
docker compose -f docker-compose.nvidia.yaml up
|
docker compose -f docker-compose.nvidia.yaml up
|
||||||
```
|
```
|
||||||
@@ -126,8 +126,8 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil
|
|||||||
- Supports text, multimodal, and image generation models
|
- Supports text, multimodal, and image generation models
|
||||||
- Run with: `docker compose -f docker-compose.nvidia.yaml up`
|
- Run with: `docker compose -f docker-compose.nvidia.yaml up`
|
||||||
- Default models:
|
- Default models:
|
||||||
- Text: `gemma-3-12b-it-qat`
|
- Text: `gemma-3-4b-it-qat`
|
||||||
- Multimodal: `minicpm-v-2_6`
|
- Multimodal: `moondream2-20250414`
|
||||||
- Image: `sd-1.5-ggml`
|
- Image: `sd-1.5-ggml`
|
||||||
- Environment variables:
|
- Environment variables:
|
||||||
- `MODEL_NAME`: Text model to use
|
- `MODEL_NAME`: Text model to use
|
||||||
@@ -142,8 +142,8 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil
|
|||||||
- Supports text, multimodal, and image generation models
|
- Supports text, multimodal, and image generation models
|
||||||
- Run with: `docker compose -f docker-compose.intel.yaml up`
|
- Run with: `docker compose -f docker-compose.intel.yaml up`
|
||||||
- Default models:
|
- Default models:
|
||||||
- Text: `gemma-3-12b-it-qat`
|
- Text: `gemma-3-4b-it-qat`
|
||||||
- Multimodal: `minicpm-v-2_6`
|
- Multimodal: `moondream2-20250414`
|
||||||
- Image: `sd-1.5-ggml`
|
- Image: `sd-1.5-ggml`
|
||||||
- Environment variables:
|
- Environment variables:
|
||||||
- `MODEL_NAME`: Text model to use
|
- `MODEL_NAME`: Text model to use
|
||||||
@@ -161,20 +161,20 @@ MODEL_NAME=gemma-3-12b-it docker compose up
|
|||||||
|
|
||||||
# NVIDIA GPU with custom models
|
# NVIDIA GPU with custom models
|
||||||
MODEL_NAME=gemma-3-12b-it \
|
MODEL_NAME=gemma-3-12b-it \
|
||||||
MULTIMODAL_MODEL=minicpm-v-2_6 \
|
MULTIMODAL_MODEL=moondream2-20250414 \
|
||||||
IMAGE_MODEL=flux.1-dev-ggml \
|
IMAGE_MODEL=flux.1-dev-ggml \
|
||||||
docker compose -f docker-compose.nvidia.yaml up
|
docker compose -f docker-compose.nvidia.yaml up
|
||||||
|
|
||||||
# Intel GPU with custom models
|
# Intel GPU with custom models
|
||||||
MODEL_NAME=gemma-3-12b-it \
|
MODEL_NAME=gemma-3-12b-it \
|
||||||
MULTIMODAL_MODEL=minicpm-v-2_6 \
|
MULTIMODAL_MODEL=moondream2-20250414 \
|
||||||
IMAGE_MODEL=sd-1.5-ggml \
|
IMAGE_MODEL=sd-1.5-ggml \
|
||||||
docker compose -f docker-compose.intel.yaml up
|
docker compose -f docker-compose.intel.yaml up
|
||||||
```
|
```
|
||||||
|
|
||||||
If no models are specified, it will use the defaults:
|
If no models are specified, it will use the defaults:
|
||||||
- Text model: `gemma-3-12b-it-qat`
|
- Text model: `gemma-3-4b-it-qat`
|
||||||
- Multimodal model: `minicpm-v-2_6`
|
- Multimodal model: `moondream2-20250414`
|
||||||
- Image model: `sd-1.5-ggml`
|
- Image model: `sd-1.5-ggml`
|
||||||
|
|
||||||
Good (relatively small) models that have been tested are:
|
Good (relatively small) models that have been tested are:
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/mudler/LocalAGI/core/action"
|
"github.com/mudler/LocalAGI/core/action"
|
||||||
"github.com/mudler/LocalAGI/core/types"
|
"github.com/mudler/LocalAGI/core/types"
|
||||||
@@ -12,12 +13,24 @@ import (
|
|||||||
"github.com/mudler/LocalAGI/pkg/xlog"
|
"github.com/mudler/LocalAGI/pkg/xlog"
|
||||||
|
|
||||||
"github.com/sashabaranov/go-openai"
|
"github.com/sashabaranov/go-openai"
|
||||||
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const parameterReasoningPrompt = `You are tasked with generating the optimal parameters for the action "%s". The action requires the following parameters:
|
||||||
|
%s
|
||||||
|
|
||||||
|
Your task is to:
|
||||||
|
1. Generate the best possible values for each required parameter
|
||||||
|
2. If the parameter requires code, provide complete, working code
|
||||||
|
3. If the parameter requires text or documentation, provide comprehensive, well-structured content
|
||||||
|
4. Ensure all parameters are complete and ready to be used
|
||||||
|
|
||||||
|
Focus on quality and completeness. Do not explain your reasoning or analyze the action's purpose - just provide the best possible parameter values.`
|
||||||
|
|
||||||
type decisionResult struct {
|
type decisionResult struct {
|
||||||
actionParams types.ActionParams
|
actionParams types.ActionParams
|
||||||
message string
|
message string
|
||||||
actioName string
|
actionName string
|
||||||
}
|
}
|
||||||
|
|
||||||
// decision forces the agent to take one of the available actions
|
// decision forces the agent to take one of the available actions
|
||||||
@@ -131,7 +144,7 @@ func (a *Agent) decision(
|
|||||||
a.observer.Update(*obs)
|
a.observer.Update(*obs)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &decisionResult{actionParams: params, actioName: msg.ToolCalls[0].Function.Name, message: msg.Content}, nil
|
return &decisionResult{actionParams: params, actionName: msg.ToolCalls[0].Function.Name, message: msg.Content}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("failed to make a decision after %d attempts: %w", maxRetries, lastErr)
|
return nil, fmt.Errorf("failed to make a decision after %d attempts: %w", maxRetries, lastErr)
|
||||||
@@ -248,9 +261,32 @@ func (a *Agent) generateParameters(job *types.Job, pickTemplate string, act type
|
|||||||
|
|
||||||
cc := conversation
|
cc := conversation
|
||||||
if a.options.forceReasoning {
|
if a.options.forceReasoning {
|
||||||
|
// First, get the LLM to reason about optimal parameter usage
|
||||||
|
parameterReasoningPrompt := fmt.Sprintf(parameterReasoningPrompt,
|
||||||
|
act.Definition().Name,
|
||||||
|
formatProperties(act.Definition().Properties))
|
||||||
|
|
||||||
|
// Get initial reasoning about parameters using askLLM
|
||||||
|
paramReasoningMsg, err := a.askLLM(job.GetContext(),
|
||||||
|
append(conversation, openai.ChatCompletionMessage{
|
||||||
|
Role: "system",
|
||||||
|
Content: parameterReasoningPrompt,
|
||||||
|
}),
|
||||||
|
maxAttempts,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
xlog.Warn("Failed to get parameter reasoning", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine original reasoning with parameter-specific reasoning
|
||||||
|
enhancedReasoning := reasoning
|
||||||
|
if paramReasoningMsg.Content != "" {
|
||||||
|
enhancedReasoning = fmt.Sprintf("%s\n\nParameter Analysis:\n%s", reasoning, paramReasoningMsg.Content)
|
||||||
|
}
|
||||||
|
|
||||||
cc = append(conversation, openai.ChatCompletionMessage{
|
cc = append(conversation, openai.ChatCompletionMessage{
|
||||||
Role: "system",
|
Role: "system",
|
||||||
Content: fmt.Sprintf("The agent decided to use the tool %s with the following reasoning: %s", act.Definition().Name, reasoning),
|
Content: fmt.Sprintf("The agent decided to use the tool %s with the following reasoning: %s", act.Definition().Name, enhancedReasoning),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -273,6 +309,15 @@ func (a *Agent) generateParameters(job *types.Job, pickTemplate string, act type
|
|||||||
return nil, fmt.Errorf("failed to generate parameters after %d attempts: %w", maxAttempts, attemptErr)
|
return nil, fmt.Errorf("failed to generate parameters after %d attempts: %w", maxAttempts, attemptErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function to format properties for the prompt
|
||||||
|
func formatProperties(props map[string]jsonschema.Definition) string {
|
||||||
|
var result strings.Builder
|
||||||
|
for name, prop := range props {
|
||||||
|
result.WriteString(fmt.Sprintf("- %s: %s\n", name, prop.Description))
|
||||||
|
}
|
||||||
|
return result.String()
|
||||||
|
}
|
||||||
|
|
||||||
func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction types.Action, actionParams types.ActionParams, reasoning string, pickTemplate string, conv Messages) (Messages, error) {
|
func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction types.Action, actionParams types.ActionParams, reasoning string, pickTemplate string, conv Messages) (Messages, error) {
|
||||||
// Planning: run all the actions in sequence
|
// Planning: run all the actions in sequence
|
||||||
if !chosenAction.Definition().Name.Is(action.PlanActionName) {
|
if !chosenAction.Definition().Name.Is(action.PlanActionName) {
|
||||||
@@ -455,12 +500,12 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC
|
|||||||
return nil, nil, "", err
|
return nil, nil, "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
xlog.Debug(fmt.Sprintf("thought action Name: %v", thought.actioName))
|
xlog.Debug("thought action Name", "actionName", thought.actionName)
|
||||||
xlog.Debug(fmt.Sprintf("thought message: %v", thought.message))
|
xlog.Debug("thought message", "message", thought.message)
|
||||||
|
|
||||||
// Find the action
|
// Find the action
|
||||||
chosenAction := a.availableActions().Find(thought.actioName)
|
chosenAction := a.availableActions().Find(thought.actionName)
|
||||||
if chosenAction == nil || thought.actioName == "" {
|
if chosenAction == nil || thought.actionName == "" {
|
||||||
xlog.Debug("no answer")
|
xlog.Debug("no answer")
|
||||||
|
|
||||||
// LLM replied with an answer?
|
// LLM replied with an answer?
|
||||||
@@ -496,8 +541,8 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, "", err
|
return nil, nil, "", err
|
||||||
}
|
}
|
||||||
if thought.actioName != "" && thought.actioName != reasoningAction.Definition().Name.String() {
|
if thought.actionName != "" && thought.actionName != reasoningAction.Definition().Name.String() {
|
||||||
return nil, nil, "", fmt.Errorf("Expected reasoning action not: %s", thought.actioName)
|
return nil, nil, "", fmt.Errorf("expected reasoning action %s, got %s", reasoningAction.Definition().Name.String(), thought.actionName)
|
||||||
}
|
}
|
||||||
|
|
||||||
originalReasoning := ""
|
originalReasoning := ""
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ services:
|
|||||||
# Image list (dockerhub): https://hub.docker.com/r/localai/localai
|
# Image list (dockerhub): https://hub.docker.com/r/localai/localai
|
||||||
image: localai/localai:master
|
image: localai/localai:master
|
||||||
command:
|
command:
|
||||||
- ${MODEL_NAME:-gemma-3-12b-it-qat}
|
- ${MODEL_NAME:-gemma-3-4b-it-qat}
|
||||||
- ${MULTIMODAL_MODEL:-minicpm-v-2_6}
|
- ${MULTIMODAL_MODEL:-moondream2-20250414}
|
||||||
- ${IMAGE_MODEL:-sd-1.5-ggml}
|
- ${IMAGE_MODEL:-sd-1.5-ggml}
|
||||||
- granite-embedding-107m-multilingual
|
- granite-embedding-107m-multilingual
|
||||||
healthcheck:
|
healthcheck:
|
||||||
@@ -105,8 +105,8 @@ services:
|
|||||||
- 8080:3000
|
- 8080:3000
|
||||||
#image: quay.io/mudler/localagi:master
|
#image: quay.io/mudler/localagi:master
|
||||||
environment:
|
environment:
|
||||||
- LOCALAGI_MODEL=${MODEL_NAME:-gemma-3-12b-it-qat}
|
- LOCALAGI_MODEL=${MODEL_NAME:-gemma-3-4b-it-qat}
|
||||||
- LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-minicpm-v-2_6}
|
- LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-moondream2-20250414}
|
||||||
- LOCALAGI_IMAGE_MODEL=${IMAGE_MODEL:-sd-1.5-ggml}
|
- LOCALAGI_IMAGE_MODEL=${IMAGE_MODEL:-sd-1.5-ggml}
|
||||||
- LOCALAGI_LLM_API_URL=http://localai:8080
|
- LOCALAGI_LLM_API_URL=http://localai:8080
|
||||||
#- LOCALAGI_LLM_API_KEY=sk-1234567890
|
#- LOCALAGI_LLM_API_KEY=sk-1234567890
|
||||||
|
|||||||
Reference in New Issue
Block a user