diff --git a/Makefile b/Makefile index 9c978c7..3ddd7f3 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ cleanup-tests: docker compose down tests: prepare-tests - LOCALAGI_MCPBOX_URL="http://localhost:9090" LOCALAGI_MODEL="gemma-3-12b-it-qat" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./... + LOCALAGI_MCPBOX_URL="http://localhost:9090" LOCALAGI_MODEL="gemma-3-4b-it-qat" LOCALAI_API_URL="http://localhost:8081" LOCALAGI_API_URL="http://localhost:8080" $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --fail-fast -v -r ./... run-nokb: $(MAKE) run KBDISABLEINDEX=true diff --git a/README.md b/README.md index 2b549e8..19292d3 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ MODEL_NAME=gemma-3-12b-it docker compose up # NVIDIA GPU setup with custom multimodal and image models MODEL_NAME=gemma-3-12b-it \ -MULTIMODAL_MODEL=minicpm-v-2_6 \ +MULTIMODAL_MODEL=moondream2-20250414 \ IMAGE_MODEL=flux.1-dev-ggml \ docker compose -f docker-compose.nvidia.yaml up ``` @@ -126,8 +126,8 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil - Supports text, multimodal, and image generation models - Run with: `docker compose -f docker-compose.nvidia.yaml up` - Default models: - - Text: `gemma-3-12b-it-qat` - - Multimodal: `minicpm-v-2_6` + - Text: `gemma-3-4b-it-qat` + - Multimodal: `moondream2-20250414` - Image: `sd-1.5-ggml` - Environment variables: - `MODEL_NAME`: Text model to use @@ -142,8 +142,8 @@ LocalAGI supports multiple hardware configurations through Docker Compose profil - Supports text, multimodal, and image generation models - Run with: `docker compose -f docker-compose.intel.yaml up` - Default models: - - Text: `gemma-3-12b-it-qat` - - Multimodal: `minicpm-v-2_6` + - Text: `gemma-3-4b-it-qat` + - Multimodal: `moondream2-20250414` - Image: `sd-1.5-ggml` - Environment variables: - `MODEL_NAME`: Text model to use @@ -161,20 +161,20 @@ MODEL_NAME=gemma-3-12b-it docker compose up # NVIDIA GPU with custom models MODEL_NAME=gemma-3-12b-it \ -MULTIMODAL_MODEL=minicpm-v-2_6 \ +MULTIMODAL_MODEL=moondream2-20250414 \ IMAGE_MODEL=flux.1-dev-ggml \ docker compose -f docker-compose.nvidia.yaml up # Intel GPU with custom models MODEL_NAME=gemma-3-12b-it \ -MULTIMODAL_MODEL=minicpm-v-2_6 \ +MULTIMODAL_MODEL=moondream2-20250414 \ IMAGE_MODEL=sd-1.5-ggml \ docker compose -f docker-compose.intel.yaml up ``` If no models are specified, it will use the defaults: -- Text model: `gemma-3-12b-it-qat` -- Multimodal model: `minicpm-v-2_6` +- Text model: `gemma-3-4b-it-qat` +- Multimodal model: `moondream2-20250414` - Image model: `sd-1.5-ggml` Good (relatively small) models that have been tested are: diff --git a/core/agent/actions.go b/core/agent/actions.go index 077bb35..5a4aadf 100644 --- a/core/agent/actions.go +++ b/core/agent/actions.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "os" + "strings" "github.com/mudler/LocalAGI/core/action" "github.com/mudler/LocalAGI/core/types" @@ -12,12 +13,24 @@ import ( "github.com/mudler/LocalAGI/pkg/xlog" "github.com/sashabaranov/go-openai" + "github.com/sashabaranov/go-openai/jsonschema" ) +const parameterReasoningPrompt = `You are tasked with generating the optimal parameters for the action "%s". The action requires the following parameters: +%s + +Your task is to: +1. Generate the best possible values for each required parameter +2. If the parameter requires code, provide complete, working code +3. If the parameter requires text or documentation, provide comprehensive, well-structured content +4. Ensure all parameters are complete and ready to be used + +Focus on quality and completeness. Do not explain your reasoning or analyze the action's purpose - just provide the best possible parameter values.` + type decisionResult struct { actionParams types.ActionParams message string - actioName string + actionName string } // decision forces the agent to take one of the available actions @@ -131,7 +144,7 @@ func (a *Agent) decision( a.observer.Update(*obs) } - return &decisionResult{actionParams: params, actioName: msg.ToolCalls[0].Function.Name, message: msg.Content}, nil + return &decisionResult{actionParams: params, actionName: msg.ToolCalls[0].Function.Name, message: msg.Content}, nil } return nil, fmt.Errorf("failed to make a decision after %d attempts: %w", maxRetries, lastErr) @@ -248,9 +261,32 @@ func (a *Agent) generateParameters(job *types.Job, pickTemplate string, act type cc := conversation if a.options.forceReasoning { + // First, get the LLM to reason about optimal parameter usage + parameterReasoningPrompt := fmt.Sprintf(parameterReasoningPrompt, + act.Definition().Name, + formatProperties(act.Definition().Properties)) + + // Get initial reasoning about parameters using askLLM + paramReasoningMsg, err := a.askLLM(job.GetContext(), + append(conversation, openai.ChatCompletionMessage{ + Role: "system", + Content: parameterReasoningPrompt, + }), + maxAttempts, + ) + if err != nil { + xlog.Warn("Failed to get parameter reasoning", "error", err) + } + + // Combine original reasoning with parameter-specific reasoning + enhancedReasoning := reasoning + if paramReasoningMsg.Content != "" { + enhancedReasoning = fmt.Sprintf("%s\n\nParameter Analysis:\n%s", reasoning, paramReasoningMsg.Content) + } + cc = append(conversation, openai.ChatCompletionMessage{ Role: "system", - Content: fmt.Sprintf("The agent decided to use the tool %s with the following reasoning: %s", act.Definition().Name, reasoning), + Content: fmt.Sprintf("The agent decided to use the tool %s with the following reasoning: %s", act.Definition().Name, enhancedReasoning), }) } @@ -273,6 +309,15 @@ func (a *Agent) generateParameters(job *types.Job, pickTemplate string, act type return nil, fmt.Errorf("failed to generate parameters after %d attempts: %w", maxAttempts, attemptErr) } +// Helper function to format properties for the prompt +func formatProperties(props map[string]jsonschema.Definition) string { + var result strings.Builder + for name, prop := range props { + result.WriteString(fmt.Sprintf("- %s: %s\n", name, prop.Description)) + } + return result.String() +} + func (a *Agent) handlePlanning(ctx context.Context, job *types.Job, chosenAction types.Action, actionParams types.ActionParams, reasoning string, pickTemplate string, conv Messages) (Messages, error) { // Planning: run all the actions in sequence if !chosenAction.Definition().Name.Is(action.PlanActionName) { @@ -455,12 +500,12 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC return nil, nil, "", err } - xlog.Debug(fmt.Sprintf("thought action Name: %v", thought.actioName)) - xlog.Debug(fmt.Sprintf("thought message: %v", thought.message)) + xlog.Debug("thought action Name", "actionName", thought.actionName) + xlog.Debug("thought message", "message", thought.message) // Find the action - chosenAction := a.availableActions().Find(thought.actioName) - if chosenAction == nil || thought.actioName == "" { + chosenAction := a.availableActions().Find(thought.actionName) + if chosenAction == nil || thought.actionName == "" { xlog.Debug("no answer") // LLM replied with an answer? @@ -496,8 +541,8 @@ func (a *Agent) pickAction(job *types.Job, templ string, messages []openai.ChatC if err != nil { return nil, nil, "", err } - if thought.actioName != "" && thought.actioName != reasoningAction.Definition().Name.String() { - return nil, nil, "", fmt.Errorf("Expected reasoning action not: %s", thought.actioName) + if thought.actionName != "" && thought.actionName != reasoningAction.Definition().Name.String() { + return nil, nil, "", fmt.Errorf("expected reasoning action %s, got %s", reasoningAction.Definition().Name.String(), thought.actionName) } originalReasoning := "" diff --git a/docker-compose.yaml b/docker-compose.yaml index b4936e2..64ab018 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,8 +7,8 @@ services: # Image list (dockerhub): https://hub.docker.com/r/localai/localai image: localai/localai:master command: - - ${MODEL_NAME:-gemma-3-12b-it-qat} - - ${MULTIMODAL_MODEL:-minicpm-v-2_6} + - ${MODEL_NAME:-gemma-3-4b-it-qat} + - ${MULTIMODAL_MODEL:-moondream2-20250414} - ${IMAGE_MODEL:-sd-1.5-ggml} - granite-embedding-107m-multilingual healthcheck: @@ -105,8 +105,8 @@ services: - 8080:3000 #image: quay.io/mudler/localagi:master environment: - - LOCALAGI_MODEL=${MODEL_NAME:-gemma-3-12b-it-qat} - - LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-minicpm-v-2_6} + - LOCALAGI_MODEL=${MODEL_NAME:-gemma-3-4b-it-qat} + - LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-moondream2-20250414} - LOCALAGI_IMAGE_MODEL=${IMAGE_MODEL:-sd-1.5-ggml} - LOCALAGI_LLM_API_URL=http://localai:8080 #- LOCALAGI_LLM_API_KEY=sk-1234567890