diff --git a/README.md b/README.md index 66bd398..b57a654 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,100 @@ LocalAGI ensures your data stays exactly where you want it—on your hardware. N git clone https://github.com/mudler/LocalAGI cd LocalAGI -# CPU setup -docker compose up -f docker-compose.yml +# CPU setup (default) +docker compose up -# GPU setup -docker compose up -f docker-compose.gpu.yml +# NVIDIA GPU setup +docker compose --profile nvidia up + +# Intel GPU setup (for Intel Arc and integrated GPUs) +docker compose --profile intel up + +# Start with a specific model (see available models in models.localai.io, or localai.io to use any model in huggingface) +MODEL_NAME=gemma-3-12b-it docker compose up + +# NVIDIA GPU setup with custom multimodal and image models +MODEL_NAME=gemma-3-12b-it \ +MULTIMODAL_MODEL=minicpm-v-2_6 \ +IMAGE_MODEL=flux.1-dev \ +docker compose --profile nvidia up ``` -Access your agents at `http://localhost:8080` +Now you can access and manage your agents at [http://localhost:8080](http://localhost:8080) + +## 🖥️ Hardware Configurations + +LocalAGI supports multiple hardware configurations through Docker Compose profiles: + +### CPU (Default) +- No special configuration needed +- Runs on any system with Docker +- Best for testing and development +- Supports text models only + +### NVIDIA GPU +- Requires NVIDIA GPU and drivers +- Uses CUDA for acceleration +- Best for high-performance inference +- Supports text, multimodal, and image generation models +- Run with: `docker compose --profile nvidia up` +- Default models: + - Text: `openthinker-7b` + - Multimodal: `minicpm-v-2_6` + - Image: `flux.1-dev` +- Environment variables: + - `MODEL_NAME`: Text model to use + - `MULTIMODAL_MODEL`: Multimodal model to use + - `IMAGE_MODEL`: Image generation model to use + - `LOCALAI_SINGLE_ACTIVE_BACKEND`: Set to `true` to enable single active backend mode + +### Intel GPU +- Supports Intel Arc and integrated GPUs +- Uses SYCL for acceleration +- Best for Intel-based systems +- Supports text, multimodal, and image generation models +- Run with: `docker compose --profile intel up` +- Default models: + - Text: `openthinker-7b` + - Multimodal: `minicpm-v-2_6` + - Image: `sd-1.5-ggml` +- Environment variables: + - `MODEL_NAME`: Text model to use + - `MULTIMODAL_MODEL`: Multimodal model to use + - `IMAGE_MODEL`: Image generation model to use + - `LOCALAI_SINGLE_ACTIVE_BACKEND`: Set to `true` to enable single active backend mode + +## Customize models + +You can customize the models used by LocalAGI by setting environment variables when running docker-compose. For example: + +```bash +# CPU with custom model +MODEL_NAME=gemma-3-12b-it docker compose up + +# NVIDIA GPU with custom models +MODEL_NAME=gemma-3-12b-it \ +MULTIMODAL_MODEL=minicpm-v-2_6 \ +IMAGE_MODEL=flux.1-dev \ +docker compose --profile nvidia up + +# Intel GPU with custom models +MODEL_NAME=gemma-3-12b-it \ +MULTIMODAL_MODEL=minicpm-v-2_6 \ +IMAGE_MODEL=sd-1.5-ggml \ +docker compose --profile intel up +``` + +If no models are specified, it will use the defaults: +- Text model: `openthinker-7b` +- Multimodal model: `minicpm-v-2_6` +- Image model: `flux.1-dev` (NVIDIA) or `sd-1.5-ggml` (Intel) + +Good (relatively small) models that have been tested are: + +- `qwen_qwq-32b` (best in co-ordinating agents) +- `gemma-3-12b-it` +- `gemma-3-27b-it` ## 🏆 Why Choose LocalAGI? @@ -98,6 +184,8 @@ Explore detailed documentation including: ### Environment Configuration +LocalAGI supports environment configurations. Note that these environment variables needs to be specified in the localagi container in the docker-compose file to have effect. + | Variable | What It Does | |----------|--------------| | `LOCALAGI_MODEL` | Your go-to model | diff --git a/core/action/plan.go b/core/action/plan.go index f1c4c5d..6f8d5b3 100644 --- a/core/action/plan.go +++ b/core/action/plan.go @@ -41,7 +41,7 @@ func (a *PlanAction) Plannable() bool { func (a *PlanAction) Definition() types.ActionDefinition { return types.ActionDefinition{ Name: PlanActionName, - Description: "Use this tool for solving complex tasks that involves calling more tools in sequence.", + Description: "Use it for situations that involves doing more actions in sequence.", Properties: map[string]jsonschema.Definition{ "subtasks": { Type: jsonschema.Array, diff --git a/core/agent/templates.go b/core/agent/templates.go index 5f657ea..346737b 100644 --- a/core/agent/templates.go +++ b/core/agent/templates.go @@ -115,7 +115,7 @@ Available Tools: const reSelfEvalTemplate = pickSelfTemplate const pickActionTemplate = hudTemplate + ` -Your only task is to analyze the situation and determine a goal and the best tool to use, or just a final response if we have fullfilled the goal. +Your only task is to analyze the conversation and determine a goal and the best tool to use, or just a final response if we have fullfilled the goal. Guidelines: 1. Review the current state, what was done already and context diff --git a/docker-compose.gpu.intel.yaml b/docker-compose.gpu.intel.yaml deleted file mode 100644 index e447f3a..0000000 --- a/docker-compose.gpu.intel.yaml +++ /dev/null @@ -1,75 +0,0 @@ -services: - localai: - # See https://localai.io/basics/container/#standard-container-images for - # a list of available container images (or build your own with the provided Dockerfile) - # Available images with CUDA, ROCm, SYCL, Vulkan - # Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags - # Image list (dockerhub): https://hub.docker.com/r/localai/localai - image: localai/localai:master-sycl-f32-ffmpeg-core - command: - # - rombo-org_rombo-llm-v3.0-qwen-32b # minimum suggested model - - openthinker-7b # (smaller) - - granite-embedding-107m-multilingual - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] - interval: 60s - timeout: 10m - retries: 120 - ports: - - 8081:8080 - environment: - - DEBUG=true - #- LOCALAI_API_KEY=sk-1234567890 - volumes: - - ./volumes/models:/build/models:cached - - ./volumes/images:/tmp/generated/images - devices: - # On a system with integrated GPU and an Arc 770, this is the Arc 770 - - /dev/dri/card1 - - /dev/dri/renderD129 - - localrecall: - image: quay.io/mudler/localrecall:main - ports: - - 8080 - environment: - - COLLECTION_DB_PATH=/db - - EMBEDDING_MODEL=granite-embedding-107m-multilingual - - FILE_ASSETS=/assets - - OPENAI_API_KEY=sk-1234567890 - - OPENAI_BASE_URL=http://localai:8080 - volumes: - - ./volumes/localrag/db:/db - - ./volumes/localrag/assets/:/assets - - localrecall-healthcheck: - depends_on: - localrecall: - condition: service_started - image: busybox - command: ["sh", "-c", "until wget -q -O - http://localrecall:8080 > /dev/null 2>&1; do echo 'Waiting for localrecall...'; sleep 1; done; echo 'localrecall is up!'"] - - localagi: - depends_on: - localai: - condition: service_healthy - localrecall-healthcheck: - condition: service_completed_successfully - build: - context: . - dockerfile: Dockerfile.webui - ports: - - 8080:3000 - image: quay.io/mudler/localagi:master - environment: - - LOCALAGI_MODEL=openthinker-7b - - LOCALAGI_LLM_API_URL=http://localai:8080 - #- LOCALAGI_LLM_API_KEY=sk-1234567890 - - LOCALAGI_LOCALRAG_URL=http://localrecall:8080 - - LOCALAGI_STATE_DIR=/pool - - LOCALAGI_TIMEOUT=5m - - LOCALAGI_ENABLE_CONVERSATIONS_LOGGING=false - extra_hosts: - - "host.docker.internal:host-gateway" - volumes: - - ./volumes/localagi/:/pool diff --git a/docker-compose.gpu.yaml b/docker-compose.gpu.yaml deleted file mode 100644 index 3cf67fe..0000000 --- a/docker-compose.gpu.yaml +++ /dev/null @@ -1,85 +0,0 @@ -services: - localai: - # See https://localai.io/basics/container/#standard-container-images for - # a list of available container images (or build your own with the provided Dockerfile) - # Available images with CUDA, ROCm, SYCL, Vulkan - # Image list (quay.io): https://quay.io/repository/go-skynet/local-ai?tab=tags - # Image list (dockerhub): https://hub.docker.com/r/localai/localai - image: localai/localai:master-gpu-nvidia-cuda-12 - command: - - mlabonne_gemma-3-27b-it-abliterated - - qwen_qwq-32b - # Other good alternative options: - # - rombo-org_rombo-llm-v3.0-qwen-32b # minimum suggested model - # - arcee-agent - - granite-embedding-107m-multilingual - - flux.1-dev - - minicpm-v-2_6 - environment: - # Enable if you have a single GPU which don't fit all the models - - LOCALAI_SINGLE_ACTIVE_BACKEND=true - - DEBUG=true - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] - interval: 10s - timeout: 20m - retries: 20 - ports: - - 8081:8080 - volumes: - - ./volumes/models:/build/models:cached - - ./volumes/images:/tmp/generated/images - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - localrecall: - image: quay.io/mudler/localrecall:main - ports: - - 8080 - environment: - - COLLECTION_DB_PATH=/db - - EMBEDDING_MODEL=granite-embedding-107m-multilingual - - FILE_ASSETS=/assets - - OPENAI_API_KEY=sk-1234567890 - - OPENAI_BASE_URL=http://localai:8080 - volumes: - - ./volumes/localrag/db:/db - - ./volumes/localrag/assets/:/assets - - localrecall-healthcheck: - depends_on: - localrecall: - condition: service_started - image: busybox - command: ["sh", "-c", "until wget -q -O - http://localrecall:8080 > /dev/null 2>&1; do echo 'Waiting for localrecall...'; sleep 1; done; echo 'localrecall is up!'"] - - localagi: - depends_on: - localai: - condition: service_healthy - localrecall-healthcheck: - condition: service_completed_successfully - build: - context: . - dockerfile: Dockerfile.webui - ports: - - 8080:3000 - image: quay.io/mudler/localagi:master - environment: - - LOCALAGI_MODEL=qwen_qwq-32b - - LOCALAGI_LLM_API_URL=http://localai:8080 - #- LOCALAGI_LLM_API_KEY=sk-1234567890 - - LOCALAGI_LOCALRAG_URL=http://localrecall:8080 - - LOCALAGI_STATE_DIR=/pool - - LOCALAGI_TIMEOUT=5m - - LOCALAGI_ENABLE_CONVERSATIONS_LOGGING=false - - LOCALAGI_MULTIMODAL_MODEL=minicpm-v-2_6 - - LOCALAGI_IMAGE_MODEL=flux.1-dev - extra_hosts: - - "host.docker.internal:host-gateway" - volumes: - - ./volumes/localagi/:/pool diff --git a/docker-compose.yaml b/docker-compose.yaml index 60963ba..101ebf7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -24,14 +24,44 @@ services: - ./volumes/models:/build/models:cached - ./volumes/images:/tmp/generated/images - # decomment the following piece if running with Nvidia GPUs - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: [gpu] + localai-nvidia: + profiles: ["nvidia"] + extends: + service: localai + environment: + - LOCALAI_SINGLE_ACTIVE_BACKEND=true + - DEBUG=true + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + command: + - ${MODEL_NAME:-openthinker-7b} + - ${MULTIMODAL_MODEL:-minicpm-v-2_6} + - ${IMAGE_MODEL:-flux.1-dev} + - granite-embedding-107m-multilingual + + localai-intel: + profiles: ["intel"] + environment: + - LOCALAI_SINGLE_ACTIVE_BACKEND=true + - DEBUG=true + extends: + service: localai + image: localai/localai:master-sycl-f32-ffmpeg-core + devices: + # On a system with integrated GPU and an Arc 770, this is the Arc 770 + - /dev/dri/card1 + - /dev/dri/renderD129 + command: + - ${MODEL_NAME:-openthinker-7b} + - ${MULTIMODAL_MODEL:-minicpm-v-2_6} + - ${IMAGE_MODEL:-sd-1.5-ggml} + - granite-embedding-107m-multilingual + localrecall: image: quay.io/mudler/localrecall:main ports: @@ -77,3 +107,31 @@ services: - "host.docker.internal:host-gateway" volumes: - ./volumes/localagi/:/pool + + localagi-nvidia: + profiles: ["nvidia"] + extends: + service: localagi + environment: + - LOCALAGI_MODEL=${MODEL_NAME:-openthinker-7b} + - LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-minicpm-v-2_6} + - LOCALAGI_IMAGE_MODEL=${IMAGE_MODEL:-flux.1-dev} + - LOCALAGI_LLM_API_URL=http://localai:8080 + - LOCALAGI_LOCALRAG_URL=http://localrecall:8080 + - LOCALAGI_STATE_DIR=/pool + - LOCALAGI_TIMEOUT=5m + - LOCALAGI_ENABLE_CONVERSATIONS_LOGGING=false + + localagi-intel: + profiles: ["intel"] + extends: + service: localagi + environment: + - LOCALAGI_MODEL=${MODEL_NAME:-openthinker-7b} + - LOCALAGI_MULTIMODAL_MODEL=${MULTIMODAL_MODEL:-minicpm-v-2_6} + - LOCALAGI_IMAGE_MODEL=${IMAGE_MODEL:-sd-1.5-ggml} + - LOCALAGI_LLM_API_URL=http://localai:8080 + - LOCALAGI_LOCALRAG_URL=http://localrecall:8080 + - LOCALAGI_STATE_DIR=/pool + - LOCALAGI_TIMEOUT=5m + - LOCALAGI_ENABLE_CONVERSATIONS_LOGGING=false