Simplify, use reasoning instead of context in subtasks

2023-08-04 18:45:16 +02:00
parent f3cdec2176
commit 8d35592024
4 changed files with 111 additions and 122 deletions
--- a/.env
+++ b/.env
@@ -8,7 +8,9 @@ MODELS_PATH=/models
 GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]
 # Select model configuration in the config directory
 #PRELOAD_MODELS_CONFIG=/config/wizardlm-13b.yaml
 PRELOAD_MODELS_CONFIG=/config/wizardlm-13b.yaml
 #PRELOAD_MODELS_CONFIG=/config/wizardlm-13b-superhot.yaml
 # You don't need to put a valid OpenAI key, however, the python libraries expect
 # the string to be set or panics
--- a/README.md
+++ b/README.md
@@ -5,7 +5,17 @@
 <br>
 </h1>
-From the [LocalAI](https://localai.io) author, microAGI. 100% Local AI assistant.
+From the [LocalAI](https://localai.io) author, μAGI. 100% Local AI assistant.
 LocalAGI is AutoGPT that you can run locally.
 The goal is:
 - Keep it simple, hackable and easy to understand
 - If you can't run it locally, it is not AGI
 - No API keys needed
 - No cloud services needed
 - Small set of dependencies
 - Run with Docker
 Note: this is a fun project, not a serious one. Be warned!
@@ -41,7 +51,7 @@ The context size is a limitation - you can find in the `config` examples to run
 ### How it works?
-`microAGI` just does the minimal around LocalAI functions to create a virtual assistant that can do generic tasks. It works by an endless loop of `intent detection`, `function invocation`, `self-evaluation` and `reply generation` (if it decides to reply! :)). The agent is capable of planning complex tasks by invoking multiple functions, and remember things from the conversation.
+`μAGI` just does the minimal around LocalAI functions to create a virtual assistant that can do generic tasks. It works by an endless loop of `intent detection`, `function invocation`, `self-evaluation` and `reply generation` (if it decides to reply! :)). The agent is capable of planning complex tasks by invoking multiple functions, and remember things from the conversation.
 In a nutshell, it goes like this:
@@ -52,6 +62,15 @@ In a nutshell, it goes like this:
 Under the hood LocalAI converts functions to llama.cpp BNF grammars. While OpenAI fine-tuned a model to reply to functions, LocalAI constrains the LLM to follow grammars. This is a much more efficient way to do it, and it is also more flexible as you can define your own functions and grammars. For learning more about this, check out the [LocalAI documentation](https://localai.io/docs/llm) and my tweet that explains how it works under the hoods: https://twitter.com/mudler_it/status/1675524071457533953.
 ### Agent functions
 The intention of this project is to keep the agent minimal, so can be built on top of it or forked. The agent is capable of doing the following functions:
 - remember something from the conversation
 - recall something from the conversation
 - search something from the internet
 - plan a complex task by invoking multiple functions
 - write files to disk
 ## Roadmap
 - [x] 100% Local, with Local AI. NO API KEYS NEEDED!
--- a/main.py
+++ b/main.py
@@ -9,8 +9,8 @@ from contextlib import redirect_stdout
 from loguru import logger
 from ascii_magic import AsciiArt
 from duckduckgo_search import DDGS
 from bs4 import BeautifulSoup
 from typing import Dict, List, Optional
 import subprocess
 # these three lines swap the stdlib sqlite3 lib with the pysqlite3 package for chroma
 __import__('pysqlite3')
@@ -44,6 +44,14 @@ parser.add_argument('--postprocess', dest='postprocess', action='store_true', de
 parser.add_argument('--subtask-context', dest='subtaskContext', action='store_true', default=False,
                    help='Include context in subtasks')
 # Search results number
 parser.add_argument('--search-results', dest='search_results', type=int, action='store', default=2,
                    help='Number of search results to return')
 # Plan message
 parser.add_argument('--plan-message', dest='plan_message', action='store', 
                    help="What message to use during planning",
 )                   
 DEFAULT_PROMPT="floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text"
 DEFAULT_API_BASE = os.environ.get("DEFAULT_API_BASE", "http://api:8080")
 # TTS api base
@@ -175,10 +183,10 @@ Function call: """
                "type": "string",
                "description": "reasoning behind the intent"
            },
-            "observation": {
+            # "observation": {
-                "type": "string",
+            #     "type": "string",
-                "description": "reasoning behind the intent"
+            #     "description": "reasoning behind the intent"
-            },
+            # },
            "action": {
                "type": "string",
                "enum": list(agent_actions.keys()),
@@ -259,15 +267,11 @@ Function call: """
    if response_message.get("function_call"):
        function_name = response.choices[0].message["function_call"].name
        function_parameters = response.choices[0].message["function_call"].arguments
-        logger.info("==> function name: ")
+        logger.info("==> function parameters: {function_parameters}",function_parameters=function_parameters)
        logger.info(function_name)
        logger.info("==> function parameters: ")
        logger.info(function_parameters)
        function_to_call = agent_actions[function_name]["function"]
        function_result = function_to_call(function_parameters, agent_actions=agent_actions)
-        logger.info("==> function result: ")
+        logger.info("==> function result: {function_result}", function_result=function_result)
        logger.info(function_result)
        messages.append(
            {
                "role": "assistant",
@@ -289,9 +293,7 @@ def function_completion(messages, action="", agent_actions={}):
    function_call = "auto"
    if action != "":
        function_call={"name": action}
-    logger.info("==> function_call: ")
+    logger.info("==> function name: {function_call}", function_call=function_call)
    logger.info(function_call)
    # get the functions from the signatures of the agent actions, if exists
    functions = []
    for action in agent_actions:
@@ -329,16 +331,7 @@ def process_history(conversation_history):
            messages+="Assistant message: "+message["content"]+"\n"
    return messages
-def clarify(responses, prefix=True):
+def converse(responses):
    # TODO: this needs to be optimized
    if prefix:
        responses.append(
            {
                "role": "system",
                "content": "Return an appropriate answer to the user given the context above."
            }
        ) 
    response = openai.ChatCompletion.create(
        model=LLM_MODEL,
        messages=responses,
@@ -357,31 +350,29 @@ def clarify(responses, prefix=True):
 ### Fine tune a string before feeding into the LLM
-def analyze(responses, prefix=True):
+def analyze(responses, prefix="Analyze the following text highlighting the relevant information and identify a list of actions to take if there are any. If there are errors, suggest solutions to fix them"):
-    # TODO: this needs to be optimized
+    string = process_history(responses)
-    if prefix:
+    messages = [
-        responses.append(
+        {
-            {
+        "role": "user",
-                "role": "system",
+        "content": f"""{prefix}:
                "content": "Analyze the above text highlighting the relevant information. If there are errors, suggest solutions to fix them."
            }
        ) 
 ```
 {string}
 ```
 """,
        }
    ]
    response = openai.ChatCompletion.create(
        model=LLM_MODEL,
-        messages=responses,
+        messages=messages,
        stop=None,
        api_base=LOCALAI_API_BASE+"/v1",
        request_timeout=1200,
        temperature=0.1,
    )
-    responses.append(
+    return  response.choices[0].message["content"]
        {
            "role": "assistant",
            "content": response.choices[0].message["content"],
        }
    )
    return responses
 def post_process(string):
    messages = [
@@ -420,18 +411,27 @@ def save(memory, agent_actions={}):
    chroma_client.persist()
    return f"The object was saved permanently to memory."
-def search(query, agent_actions={}):
+def search_memory(query, agent_actions={}):
    q = json.loads(query)
    docs = chroma_client.similarity_search(q["reasoning"])
    text_res="Memories found in the database:\n"
    for doc in docs:
        text_res+="- "+doc.page_content+"\n"
    if args.postprocess:
        return post_process(text_res)
    return text_res
-def calculate_plan(user_input, agent_actions={}):
+def generate_plan(user_input, agent_actions={}):
    res = json.loads(user_input)
    logger.info("--> Calculating plan: {description}", description=res["description"])
    descriptions=action_description("",agent_actions)
    plan_message = "The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
    if args.plan_message:
        plan_message = args.plan_message
        # plan_message = "The assistant replies with a plan of 3 steps to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
    messages = [
            {"role": "user",
             "content": f"""Transcript of AI assistant responding to user requests. 
@@ -439,7 +439,7 @@ def calculate_plan(user_input, agent_actions={}):
 Request: {res["description"]}
-The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task.
+The assistant replies with a plan of 3 steps to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task.
 Function call: """
             }
@@ -463,10 +463,6 @@ Function call: """
                    "items": {
                        "type": "object",
                        "properties": {
                            "observation": {
                                "type": "string",
                                "description": "subtask list",
                            },
                            "reasoning": {
                                "type": "string",
                                "description": "subtask list",
@@ -566,29 +562,19 @@ def ddg(query: str, num_results: int, backend: str = "api") -> List[Dict[str, st
    return formatted_results
 ## Search on duckduckgo
-def search_duckduckgo(args, agent_actions={}):
+def search_duckduckgo(a, agent_actions={}):
-    args = json.loads(args)
+    a = json.loads(a)
-    list=ddg(args["query"], 5)
+    list=ddg(a["query"], args.search_results)
-    text_res="Internet search results:\n"
+    text_res="Internet search results:\n"   
    for doc in list:
        text_res+=f"""- {doc["snippet"]}. Source: {doc["title"]} - {doc["link"]}\n"""
    #if args.postprocess:
    #    return post_process(text_res)
    return text_res
    #l = json.dumps(list)
    #return l
 ## Action to browse an url and return only text in the body (stripping HTML tags)
 def browse_url(args, agent_actions={}):
    args = json.loads(args)
    url=args["url"]
    logger.info("==> browsing url: {url}", url=url)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()    # rip it out
    text_res=f"Website {url}:\n"
    text_res+=soup.get_text()
    return text_res
 ### End Agent capabilities
 ###
@@ -617,13 +603,13 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
    #if re_evaluate and not re_evaluation_in_progress:
    #    observation = analyze(conversation_history, prefix=True)
-    #    action_picker_message+="\n\nObservation: "+observation[-1]["content"]
+    #    action_picker_message+="\n\Thought: "+observation[-1]["content"]
    if re_evaluation_in_progress:
-        observation = analyze(conversation_history, prefix=True)
+        observation = analyze(conversation_history)
        action_picker_message="Decide from the output below if we have to do another action:\n"
        action_picker_message+="```\n"+user_input+"\n```"
-        action_picker_message+="\n\nObservation: "+observation[-1]["content"]
+        action_picker_message+="\n\nObservation: "+observation
-
+        # if there is no action to do, we can just reply to the user with REPLY_ACTION
    try:
        action = needs_to_do_action(action_picker_message,agent_actions=agent_actions)
    except Exception as e:
@@ -633,14 +619,15 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
    if action["action"] != REPLY_ACTION:
        logger.info("==> LocalAGI wants to call '{action}'", action=action["action"])
-        logger.info("==> Observation '{reasoning}'", reasoning=action["observation"])
+        #logger.info("==> Observation '{reasoning}'", reasoning=action["observation"])
        logger.info("==> Reasoning '{reasoning}'", reasoning=action["reasoning"])
        reasoning = action["observation"] + "\n" + action["reasoning"]
        if action["action"] == PLAN_ACTION:
            logger.info("==> It's a plan <==: ")
-        if postprocess:
+        reasoning = action["reasoning"]
-            reasoning = post_process(reasoning)
+        if action["action"] == PLAN_ACTION:
            logger.info("==> LocalAGI wants to create a plan that involves more actions ")
        #if postprocess:
            #reasoning = post_process(reasoning)
        #function_completion_message = "Conversation history:\n"+old_history+"\n"+
        function_completion_message = "Request: "+user_input+"\nReasoning: "+reasoning
@@ -652,28 +639,28 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
            # cycle subtasks and execute functions
            subtask_result=""
            for subtask in function_results["subtasks"]:
                logger.info("==> subtask: ")
                logger.info(subtask)
                #ctr="Context: "+user_input+"\nThought: "+action["reasoning"]+ "\nRequest: "+subtask["reasoning"]
-                cr="Context: "+user_input+"\n"
+                #cr="Context: "+user_input+"\n"
                cr="Reasoning: "+action["reasoning"]+ "\n"
                #cr=""
                if subtask_result != "" and subtaskContext:
                    # Include cumulative results of previous subtasks
                    # TODO: this grows context, maybe we should use a different approach or summarize
-                    if postprocess:
+                    ##if postprocess:
-                        cr+= "Subtask results: "+post_process(subtask_result)+"\n"
+                    ##    cr+= "Subtask results: "+post_process(subtask_result)+"\n"
-                    else:
+                    ##else:
-                        cr+="Subtask results: "+subtask_result+"\n"
+                    cr+="Subtask results: "+subtask_result+"\n"
-                subtask_reasoning = subtask["observation"] + "\n" + subtask["reasoning"]
+                subtask_reasoning = subtask["reasoning"]
                logger.info("==> subtask '{subtask}' ({reasoning})", subtask=subtask["function"], reasoning=subtask_reasoning)
                if postprocess:
                    cr+= "Request: "+post_process(subtask_reasoning)
                else:
                    cr+= "Request: "+subtask_reasoning
                subtask_response, function_results = process_functions(cr, subtask["function"],agent_actions=agent_actions)
-                subtask_result+=process_history(subtask_response)
+                subtask_result+=process_history(subtask_response[1:])
-                if postprocess:
+                # if postprocess:
-                    subtask_result=post_process(subtask_result)
+                #    subtask_result=post_process(subtask_result)
                responses.extend(subtask_response)
        if re_evaluate:
            ## Better output or this infinite loops..
@@ -694,10 +681,15 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
            return conversation_history       
        # TODO: this needs to be optimized
-        responses = clarify(responses, prefix=True)
+        responses = analyze(responses, prefix=f"Return an appropriate answer to the user input '{user_input}' given the context below and summarizing the actions taken\n")
        # add responses to conversation history by extending the list
-        conversation_history.extend(responses)
+        conversation_history.append(
            {
            "role": "assistant",
            "content": responses,
            }
        )
        # logger.info the latest response from the conversation history
        logger.info(conversation_history[-1]["content"])
        tts(conversation_history[-1]["content"])
@@ -709,9 +701,8 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
            logger.info("==> LocalAGI will reply to the user")
            return conversation_history        
        # TODO: this needs to be optimized
        # get the response from the model
-        response = clarify(conversation_history, prefix=False)
+        response = converse(conversation_history)
        # add the response to the conversation history by extending the list
        conversation_history.append(response)
@@ -782,31 +773,8 @@ agent_actions = {
            }
        },
    },
    "browser": {
        "function": browse_url,
        "plannable": True,
        "description": 'The assistant replies with the action "browser" to navigate links.',
        "signature": {
            "name": "browser",
            "description": """Search in memory""",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {
                        "type": "string",
                        "description": "reasoning behind the intent"
                    },
                    "reasoning": {
                        "type": "string",
                        "description": "reasoning behind the intent"
                    },
                },
                "required": ["url"]
            }
        }, 
    },
    "search_memory": {
-        "function": search,
+        "function": search_memory,
        "plannable": True,
        "description": 'The assistant replies with the action "search_memory" for searching between its memories with a query term.',
        "signature": {
@@ -825,7 +793,7 @@ agent_actions = {
        }, 
    },
    PLAN_ACTION: {
-        "function": calculate_plan,
+        "function": generate_plan,
        "plannable": False,
        "description": 'The assistant for solving complex tasks that involves calling more functions in sequence, replies with the action "'+PLAN_ACTION+'".',
        "signature": {
@@ -869,8 +837,8 @@ if not args.skip_avatar:
 if not args.prompt:
    actions = ""
    for action in agent_actions:
-        actions+=action+"\n"
+        actions+=" '"+action+"'"
-    logger.info("LocalAGI can do the following actions: {actions}", actions=actions)
+    logger.info("LocalAGI internally can do the following actions:{actions}", actions=actions)
 if not args.prompt:
    logger.info(">>> Interactive mode <<<")
@@ -891,8 +859,9 @@ if args.prompt:
        )
 else:
    # TODO: process functions also considering the conversation history? conversation history + input
    logger.info(">>> Ready! What can I do for you? ( try with: plan a roadtrip to San Francisco ) <<<")
    while True:
-        user_input = input("> ")
+        user_input = input(">>> ")
        # we are going to use the args to change the evaluation behavior
        conversation_history=evaluate(
            user_input, 
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,5 +5,4 @@ pysqlite3-binary
 requests
 ascii-magic
 loguru
-duckduckgo_search
+duckduckgo_search
 beautifulsoup4