Updates

2023-08-01 22:47:31 +02:00
parent 1263b2bf8a
commit 655e95508a
6 changed files with 185 additions and 57 deletions
--- a/.env
+++ b/.env
@@ -1,16 +1,24 @@
+# Enable debug mode in the LocalAI API
 DEBUG=true
+
+# Where models are stored
 MODELS_PATH=/models
+
+# Galleries to use
 GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]

-# 30b setup
-PRELOAD_MODELS=[{"id":"huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin","name":"gpt-4","overrides":{"context_size":4096,"mmap":true,"f16":true,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95}}},{"id":"model-gallery@stablediffusion"},{"id":"model-gallery@voice-en-us-kathleen-low"},{"url":"github:go-skynet/model-gallery/base.yaml","name":"all-MiniLM-L6-v2","overrides":{"embeddings":true,"backend":"huggingface-embeddings","parameters":{"model":"all-MiniLM-L6-v2"}}},{"id":"huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin","name":"functions","overrides":{"context_size":4096,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"template":{"chat":"","completion":""},"roles":{"assistant":"ASSISTANT:","system":"SYSTEM:","assistant_function_call":"FUNCTION_CALL:","function":"FUNCTION CALL RESULT:"},"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95},"function":{"disable_no_action":true},"mmap":true,"f16":true}}]
-# 13b setup
-# PRELOAD_MODELS=[{"id":"huggingface@thebloke/wizardlm-13b-v1.0-uncensored-ggml/wizardlm-13b-v1.0-uncensored.ggmlv3.q4_k_m.bin","name":"gpt-4","overrides":{"context_size":2048,"mmap":true,"f16":true,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95}}},{"id":"model-gallery@stablediffusion"},{"id":"model-gallery@voice-en-us-kathleen-low"},{"url":"github:go-skynet/model-gallery/base.yaml","name":"all-MiniLM-L6-v2","overrides":{"embeddings":true,"backend":"huggingface-embeddings","parameters":{"model":"all-MiniLM-L6-v2"}}},{"id":"huggingface@thebloke/wizardlm-13b-v1.0-uncensored-ggml/wizardlm-13b-v1.0-uncensored.ggmlv3.q4_0.bin","name":"functions","overrides":{"context_size":2048,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"template":{"chat":"","completion":""},"roles":{"assistant":"ASSISTANT:","system":"SYSTEM:","assistant_function_call":"FUNCTION_CALL:","function":"FUNCTION CALL RESULT:"},"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95},"function":{"disable_no_action":true},"mmap":true,"f16":true}}]
+# Select model configuration in the config directory
+PRELOAD_MODELS_CONFIG=/config/wizardlm-13b.yaml

-#PRELOAD_MODELS_CONFIG=/config/preload-models.yaml
-# 30b superhot setup
-#PRELOAD_MODELS=[{"id":"huggingface@thebloke/gplatty-30b-superhot-8k-ggml/gplatty-30b-superhot-8k.ggmlv3.q2_k.bin","name":"gpt-4","overrides":{"context_size":8192,"mmap":true,"f16":true,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95,"rope_freq_scale":0.25}}},{"id":"model-gallery@stablediffusion"},{"id":"model-gallery@voice-en-us-kathleen-low"},{"url":"github:go-skynet/model-gallery/base.yaml","name":"all-MiniLM-L6-v2","overrides":{"embeddings":true,"backend":"huggingface-embeddings","parameters":{"model":"all-MiniLM-L6-v2"}}},{"id":"huggingface@thebloke/gplatty-30b-superhot-8k-ggml/gplatty-30b-superhot-8k.ggmlv3.q2_k.bin","name":"functions","overrides":{"context_size":8192,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"template":{"chat":"","completion":""},"roles":{"assistant":"ASSISTANT:","system":"SYSTEM:","assistant_function_call":"FUNCTION_CALL:","function":"FUNCTION CALL RESULT:"},"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95,"rope_freq_scale":0.25},"function":{"disable_no_action":true},"mmap":true,"f16":true}}]
+# You don't need to put a valid OpenAI key, however, the python libraries expect
+# the string to be set or panics
 OPENAI_API_KEY=sk---
+
+# Set the OpenAI API base URL to point to LocalAI
 OPENAI_API_BASE=http://api:8080
+
+# Set an image path
 IMAGE_PATH=/tmp
+
+# Set number of default threads
 THREADS=14
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ Note: this is a fun project, not a serious one. Be warned!

 It is a dead simple experiment to show how to tie the various LocalAI functionalities to create a virtual assistant that can do tasks. It is simple on purpose, trying to be minimalistic and easy to understand and customize.

-It is different from babyAGI or AutoGPT as it uses [OpenAI functions](https://openai.com/blog/function-calling-and-other-api-updates), but locally with [LocalAI](https://localai.io) (no API keys needed!)
+It is different from babyAGI or AutoGPT as it uses [OpenAI functions](https://openai.com/blog/function-calling-and-other-api-updates) - it is a from scratch attempt built on purpose to run locally with [LocalAI](https://localai.io) (no API keys needed!) instead of expensive, cloud services.

 ## Quick start

@@ -33,6 +33,12 @@ Ask it to:
  -> and watch it engaging into dialogues with long-term memory
 - "I want you to act as a marketing and sales guy in a startup company. I want you to come up with a plan to support our new latest project, XXX, which is an open source project. you are free to come up with creative ideas to engage and attract new people to the project. The XXX project is XXX."

+### Caveats
+
+The "goodness" of a model has a big impact on how μAGI works. Currently `13b` models are powerful enough to actually able to perform multi-step tasks or do more actions. However, it is quite slow when running on CPU (no big surprise here).
+
+The context size is a limitation - you can find in the `config` examples to run with superhot 8k context size, but the quality is not good enough to perform complex tasks.
+
 ### How it works?

 `microAGI` just does the minimal around LocalAI functions to create a virtual assistant that can do generic tasks. It works by an endless loop of `intent detection`, `function invocation`, `self-evaluation` and `reply generation` (if it decides to reply! :)). The agent is capable of planning complex tasks by invoking multiple functions, and remember things from the conversation.
--- a/config/preload-models-30b-superhot.yaml
+++ b/config/preload-models-30b-superhot.yaml
--- a/config/preload-models.yaml
+++ b/config/preload-models.yaml
--- a/config/preload-models-30b.yaml
+++ b/config/preload-models-30b.yaml
@@ -1,5 +1,6 @@
- id: huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin
-  name: "gpt-4"
+- id: huggingface@thebloke/wizardlm-30b-uncensored-ggml/wizardlm-30b-uncensored.ggmlv3.q2_k.bin
+  galleryModel:
+    name: "gpt-4"
  overrides: 
    context_size: 4096
    mmap: true
@@ -20,7 +21,7 @@
    backend: huggingface-embeddings
    parameters:
      model: all-MiniLM-L6-v2
- id: huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin
+- id: huggingface@thebloke/wizardlm-30b-uncensored-ggml/wizardlm-30b-uncensored.ggmlv3.q2_k.bin
  name: functions
  overrides:
    context_size: 4096
--- a/main.py
+++ b/main.py
@@ -21,34 +21,42 @@ import os

 # Parse arguments such as system prompt and batch mode
 import argparse
-parser = argparse.ArgumentParser(description='microAGI')
+parser = argparse.ArgumentParser(description='μAGI')
 parser.add_argument('--system-prompt', dest='system_prompt', action='store',
                    help='System prompt to use')
-parser.add_argument('--batch-mode', dest='batch_mode', action='store_true', default=False,
-                    help='Batch mode')
+parser.add_argument('--prompt', dest='prompt', action='store', default=False,
+                    help='Prompt mode')
 # skip avatar creation
 parser.add_argument('--skip-avatar', dest='skip_avatar', action='store_true', default=False,
                    help='Skip avatar creation') 
+# Reevaluate
+parser.add_argument('--re-evaluate', dest='re_evaluate', action='store_true', default=False,
+                    help='Reevaluate if another action is needed or we have completed the user request')
+# Postprocess
+parser.add_argument('--postprocess', dest='postprocess', action='store_true', default=False,
+                    help='Postprocess the reasoning')
+# Subtask context
+parser.add_argument('--subtask-context', dest='subtaskContext', action='store_true', default=False,
+                    help='Include context in subtasks')
 args = parser.parse_args()


 FUNCTIONS_MODEL = os.environ.get("FUNCTIONS_MODEL", "functions")
+EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", "all-MiniLM-L6-v2")
 LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
 VOICE_MODEL= os.environ.get("TTS_MODEL","en-us-kathleen-low.onnx")
 DEFAULT_SD_MODEL = os.environ.get("DEFAULT_SD_MODEL", "stablediffusion")
 DEFAULT_SD_PROMPT = os.environ.get("DEFAULT_SD_PROMPT", "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text")
 PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/data")

+## Constants
 REPLY_ACTION = "reply"
-PLAN_ACTION = "plan"
-#embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-embeddings = LocalAIEmbeddings(model="all-MiniLM-L6-v2")
+PLAN_ACTION = "multitask_action"

+embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL)
 chroma_client = Chroma(collection_name="memories", persist_directory="db", embedding_function=embeddings)

-
-
-# Function to create images with OpenAI
+# Function to create images with LocalAI
 def display_avatar(input_text=DEFAULT_SD_PROMPT, model=DEFAULT_SD_MODEL):
    response = openai.Image.create(
        prompt=input_text,
@@ -61,6 +69,7 @@ def display_avatar(input_text=DEFAULT_SD_PROMPT, model=DEFAULT_SD_MODEL):
    my_art = AsciiArt.from_url(image_url)
    my_art.to_terminal()

+# Function to create audio with LocalAI
 def tts(input_text, model=VOICE_MODEL):
    # strip newlines from text
    input_text = input_text.replace("\n", ".")
@@ -88,12 +97,11 @@ def tts(input_text, model=VOICE_MODEL):
    # remove the audio file
    os.remove(output_file_path)

+# Function to analyze the user input and pick the next action to do
 def needs_to_do_action(user_input,agent_actions={}):

    # Get the descriptions and the actions name (the keys)
-    descriptions=""
-    for action in agent_actions:
-        descriptions+=agent_actions[action]["description"]+"\n"
+    descriptions=action_description("", agent_actions)

    messages = [
            {"role": "user",
@@ -151,11 +159,35 @@ Function call: """
        return res
    return {"action": REPLY_ACTION}

+# This is used to collect the descriptions of the agent actions, used to populate the LLM prompt
+def action_description(action, agent_actions):
+    descriptions=""
+    # generate descriptions of actions that the agent can pick
+    for a in agent_actions:
+        if ( action != "" and action == a ) or (action == ""):
+            descriptions+=agent_actions[a]["description"]+"\n"
+    return descriptions
+
+## This function is called to ask the user if does agree on the action to take and execute
+def ask_user_confirmation(action_name, action_parameters):
+    logger.info("==> Ask user confirmation")
+    logger.info("==> action_name: {action_name}", action_name=action_name)
+    logger.info("==> action_parameters: {action_parameters}", action_parameters=action_parameters)
+    # Ask via stdin
+    logger.info("==> Do you want to execute the action? (y/n)")
+    user_input = input()
+    if user_input == "y":
+        logger.info("==> Executing action")
+        return True
+    else:
+        logger.info("==> Skipping action")
+        return False
+
+### This function is used to process the functions given a user input.
+### It picks a function, executes it and returns the list of messages containing the result.
 def process_functions(user_input, action="", agent_actions={}):

-    descriptions=""
-    for a in agent_actions:
-        descriptions+=agent_actions[a]["description"]+"\n"
+    descriptions=action_description(action, agent_actions)

    messages = [
         #   {"role": "system", "content": "You are a helpful assistant."},
@@ -179,6 +211,7 @@ Function call: """
        logger.info("==> function parameters: ")
        logger.info(function_parameters)
        function_to_call = agent_actions[function_name]["function"]
+
        function_result = function_to_call(function_parameters, agent_actions=agent_actions)
        logger.info("==> function result: ")
        logger.info(function_result)
@@ -198,6 +231,7 @@ Function call: """
        )
    return messages, function_result

+### function_completion is used to autocomplete functions given a list of messages
 def function_completion(messages, action="", agent_actions={}):
    function_call = "auto"
    if action != "":
@@ -223,7 +257,8 @@ def function_completion(messages, action="", agent_actions={}):

    return response

-# Gets the content of each message in the history
+# Rework the content of each message in the history in a way that is understandable by the LLM
+# TODO: switch to templates (?)
 def process_history(conversation_history):
    messages = ""
    for message in conversation_history:
@@ -240,8 +275,10 @@ def process_history(conversation_history):
            messages+="Assistant message: "+message["content"]+"\n"
    return messages

-
-def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_actions={},re_evaluation_in_progress=False):
+### Main evaluate function
+### This function evaluates in a continuous loop the user input and the conversation history.
+### It returns the conversation history with the latest response from the assistant.
+def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_actions={},re_evaluation_in_progress=False, postprocess=False, subtaskContext=False):

    messages = [
        {
@@ -272,11 +309,14 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
        action = {"action": REPLY_ACTION}

    if action["action"] != REPLY_ACTION:
-        logger.info("==> microAGI wants to call '{action}'", action=action["action"])
+        logger.info("==> μAGI wants to call '{action}'", action=action["action"])
        logger.info("==> Reasoning '{reasoning}'", reasoning=action["reasoning"])
        if action["action"] == PLAN_ACTION:
            logger.info("==> It's a plan <==: ")

+        if postprocess:
+            action["reasoning"] = post_process(action["reasoning"])
+        
        #function_completion_message = "Conversation history:\n"+old_history+"\n"+
        function_completion_message = "Request: "+user_input+"\nReasoning: "+action["reasoning"]
        responses, function_results = process_functions(function_completion_message, action=action["action"], agent_actions=agent_actions)
@@ -291,18 +331,33 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
                logger.info(subtask)
                #ctr="Context: "+user_input+"\nThought: "+action["reasoning"]+ "\nRequest: "+subtask["reasoning"]
                cr="Context: "+user_input+"\n"
-                if subtask_result != "":
+                #cr=""
+                if subtask_result != "" and subtaskContext:
                    # Include cumulative results of previous subtasks
                    # TODO: this grows context, maybe we should use a different approach or summarize
-                    cr+="Subtask results: "+subtask_result+"\n"
-                cr+="Request: "+subtask["reasoning"]
+                    if postprocess:
+                        cr+= "Subtask results: "+post_process(subtask_result)+"\n"
+                    else:
+                        cr+="Subtask results: "+subtask_result+"\n"
+                
+                if postprocess:
+                    cr+= "Request: "+post_process(subtask["reasoning"])
+                else:
+                    cr+= "Request: "+subtask["reasoning"]
                subtask_response, function_results = process_functions(cr, subtask["function"],agent_actions=agent_actions)
                subtask_result+=process_history(subtask_response)
                responses.extend(subtask_response)
        if re_evaluate:
            ## Better output or this infinite loops..
            logger.info("-> Re-evaluate if another action is needed")
-            responses = evaluate(user_input+"\n Conversation history: \n"+process_history(responses[1:]), responses, re_evaluate,agent_actions=agent_actions,re_evaluation_in_progress=True)
+            ## ? conversation history should go after the user_input maybe?
+            re_eval = user_input +"\n"
+            re_eval += "Conversation history: \n"
+            if postprocess:
+                re_eval+= post_process(process_history(responses[1:])) +"\n"
+            else:
+                re_eval+= process_history(responses[1:]) +"\n"
+            responses = evaluate(re_eval, responses, re_evaluate,agent_actions=agent_actions,re_evaluation_in_progress=True)

        if re_evaluation_in_progress:
            conversation_history.extend(responses)
@@ -337,8 +392,8 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
        logger.info("==> no action needed")

        if re_evaluation_in_progress:
-            logger.info("==> microAGI has completed the user request")
-            logger.info("==> microAGI will reply to the user")
+            logger.info("==> μAGI has completed the user request")
+            logger.info("==> μAGI will reply to the user")
            return conversation_history        

        # get the response from the model
@@ -356,10 +411,35 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
        tts(conversation_history[-1]["content"])
    return conversation_history

+### Fine tune a string before feeding into the LLM
+def post_process(string):
+    messages = [
+        {
+        "role": "user",
+        "content": f"""Summarize the following text, keeping the relevant information:

+```
+{string}
+```
+""",
+        }
+    ]
+    logger.info("==> Post processing: {string}", string=string)
+    # get the response from the model
+    response = openai.ChatCompletion.create(
+        model=LLM_MODEL,
+        messages=messages,
+        stop=None,
+        temperature=0.1,
+        request_timeout=1200,
+    )
+    result = response["choices"][0]["message"]["content"]
+    logger.info("==> Processed: {string}", string=result)
+    return result

 ### Agent capabilities
-
+### These functions are called by the agent to perform actions
+###
 def save(memory, agent_actions={}):
    q = json.loads(memory)
    logger.info(">>> saving to memories: ") 
@@ -379,12 +459,16 @@ def search(query, agent_actions={}):
 def calculate_plan(user_input, agent_actions={}):
    res = json.loads(user_input)
    logger.info("--> Calculating plan: {description}", description=res["description"])
+    descriptions=action_description("",agent_actions)
    messages = [
            {"role": "user",
             "content": f"""Transcript of AI assistant responding to user requests. 
-Replies with a plan to achieve the user's goal with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed instruction to fullfill the task.
+{descriptions}

 Request: {res["description"]}
+
+The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task.
+
 Function call: """
             }
        ]
@@ -512,8 +596,10 @@ def search_duckduckgo(args, agent_actions={}):
    return l

 ### End Agent capabilities
+###


+### Agent action definitions
 agent_actions = {
    "search_internet": {
        "function": search_duckduckgo,
@@ -555,12 +641,12 @@ agent_actions = {
            }
        },
    },
-    "remember": {
+    "save_memory": {
        "function": save,
        "plannable": True,
-        "description": 'The assistant replies with the action "remember" and the string to save in order to remember something or save an information that thinks it is relevant permanently.',
+        "description": 'The assistant replies with the action "save_memory" and the string to remember or store an information that thinks it is relevant permanently.',
        "signature": {
-            "name": "remember",
+            "name": "save_memory",
            "description": """Save or store informations into memory.""",
            "parameters": {
                "type": "object",
@@ -574,12 +660,12 @@ agent_actions = {
            }
        },
    },
-    "recall": {
+    "search_memory": {
        "function": search,
        "plannable": True,
-        "description": 'The assistant replies with the action "recall" for searching between its memories with a query term.',
+        "description": 'The assistant replies with the action "search_memory" for searching between its memories with a query term.',
        "signature": {
-            "name": "recall",
+            "name": "search_memory",
            "description": """Search in memory""",
            "parameters": {
                "type": "object",
@@ -622,23 +708,50 @@ agent_actions = {
 conversation_history = []

 # Set a system prompt if SYSTEM_PROMPT is set
-if os.environ.get("SYSTEM_PROMPT"):
+if os.environ.get("SYSTEM_PROMPT") or args.system_prompt:
+    sprompt = os.environ.get("SYSTEM_PROMPT", args.system_prompt)
    conversation_history.append({
        "role": "system",
-        "content": os.environ.get("SYSTEM_PROMPT")
+        "content": sprompt
    })

-logger.info("Welcome to microAGI")
-logger.info("Creating avatar, please wait...")
+logger.info("Welcome to μAGI")

-display_avatar()
+# Skip avatar creation if --skip-avatar is set
+if not args.skip_avatar:
+    logger.info("Creating avatar, please wait...")
+    display_avatar()

-logger.info("Welcome to microAGI")
-logger.info("microAGI has the following actions available at its disposal:")
-for action in agent_actions:
-    logger.info("{action} - {description}", action=action, description=agent_actions[action]["description"])
+if not args.prompt:
+    logger.info("μAGI has the following actions available at its disposal:")
+    for action in agent_actions:
+        logger.info("{action} - {description}", action=action, description=agent_actions[action]["description"])
+else:
+    logger.info(">>> Prompt mode <<<")
+    logger.info(args.prompt)

-# TODO: process functions also considering the conversation history? conversation history + input
-while True:
-    user_input = input("> ")
-    conversation_history=evaluate(user_input, conversation_history, re_evaluate=True, agent_actions=agent_actions)
+# IF in prompt mode just evaluate, otherwise loop
+if args.prompt:
+    evaluate(
+        args.prompt, 
+        conversation_history, 
+        re_evaluate=args.re_evaluate, 
+        agent_actions=agent_actions,
+        # Enable to lower context usage but increases LLM calls
+        postprocess=args.postprocess,
+        subtaskContext=args.subtaskContext,
+        )
+else:
+    # TODO: process functions also considering the conversation history? conversation history + input
+    while True:
+        user_input = input("> ")
+        # we are going to use the args to change the evaluation behavior
+        conversation_history=evaluate(
+            user_input, 
+            conversation_history, 
+            re_evaluate=args.re_evaluate, 
+            agent_actions=agent_actions,
+            # Enable to lower context usage but increases LLM calls
+            postprocess=args.postprocess,
+            subtaskContext=args.subtaskContext,
+            )