From 8d35592024e0f534a9e1189677499af430b68c53 Mon Sep 17 00:00:00 2001
From: mudler <mudler@localai.io>
Date: Fri, 4 Aug 2023 18:45:16 +0200
Subject: [PATCH] Simplify, use reasoning instead of context in subtasks

---
 .env             |   2 +
 README.md        |  23 +++++-
 main.py          | 205 ++++++++++++++++++++---------------------------
 requirements.txt |   3 +-
 4 files changed, 111 insertions(+), 122 deletions(-)
diff --git a/.env b/.env
index 495f6bc..68c9dac 100644
--- a/.env
+++ b/.env
@@ -8,7 +8,9 @@ MODELS_PATH=/models
 GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]
 
 # Select model configuration in the config directory
+#PRELOAD_MODELS_CONFIG=/config/wizardlm-13b.yaml
 PRELOAD_MODELS_CONFIG=/config/wizardlm-13b.yaml
+#PRELOAD_MODELS_CONFIG=/config/wizardlm-13b-superhot.yaml
 
 # You don't need to put a valid OpenAI key, however, the python libraries expect
 # the string to be set or panics
diff --git a/README.md b/README.md
index 52a5042..85ab85c 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,17 @@
 <br>
 </h1>
 
-From the [LocalAI](https://localai.io) author, microAGI. 100% Local AI assistant.
+From the [LocalAI](https://localai.io) author, μAGI. 100% Local AI assistant.
+
+LocalAGI is AutoGPT that you can run locally.
+
+The goal is:
+- Keep it simple, hackable and easy to understand
+- If you can't run it locally, it is not AGI
+- No API keys needed
+- No cloud services needed
+- Small set of dependencies
+- Run with Docker
 
 Note: this is a fun project, not a serious one. Be warned!
 
@@ -41,7 +51,7 @@ The context size is a limitation - you can find in the `config` examples to run
 
 ### How it works?
 
-`microAGI` just does the minimal around LocalAI functions to create a virtual assistant that can do generic tasks. It works by an endless loop of `intent detection`, `function invocation`, `self-evaluation` and `reply generation` (if it decides to reply! :)). The agent is capable of planning complex tasks by invoking multiple functions, and remember things from the conversation.
+`μAGI` just does the minimal around LocalAI functions to create a virtual assistant that can do generic tasks. It works by an endless loop of `intent detection`, `function invocation`, `self-evaluation` and `reply generation` (if it decides to reply! :)). The agent is capable of planning complex tasks by invoking multiple functions, and remember things from the conversation.
 
 In a nutshell, it goes like this:
 
@@ -52,6 +62,15 @@ In a nutshell, it goes like this:
 
 Under the hood LocalAI converts functions to llama.cpp BNF grammars. While OpenAI fine-tuned a model to reply to functions, LocalAI constrains the LLM to follow grammars. This is a much more efficient way to do it, and it is also more flexible as you can define your own functions and grammars. For learning more about this, check out the [LocalAI documentation](https://localai.io/docs/llm) and my tweet that explains how it works under the hoods: https://twitter.com/mudler_it/status/1675524071457533953.
 
+### Agent functions
+
+The intention of this project is to keep the agent minimal, so can be built on top of it or forked. The agent is capable of doing the following functions:
+- remember something from the conversation
+- recall something from the conversation
+- search something from the internet
+- plan a complex task by invoking multiple functions
+- write files to disk
+
 ## Roadmap
 
 - [x] 100% Local, with Local AI. NO API KEYS NEEDED!
diff --git a/main.py b/main.py
index 85642b5..54b48dd 100644
--- a/main.py
+++ b/main.py
@@ -9,8 +9,8 @@ from contextlib import redirect_stdout
 from loguru import logger
 from ascii_magic import AsciiArt
 from duckduckgo_search import DDGS
-from bs4 import BeautifulSoup
 from typing import Dict, List, Optional
+import subprocess
 
 # these three lines swap the stdlib sqlite3 lib with the pysqlite3 package for chroma
 __import__('pysqlite3')
@@ -44,6 +44,14 @@ parser.add_argument('--postprocess', dest='postprocess', action='store_true', de
 parser.add_argument('--subtask-context', dest='subtaskContext', action='store_true', default=False,
                     help='Include context in subtasks')
 
+# Search results number
+parser.add_argument('--search-results', dest='search_results', type=int, action='store', default=2,
+                    help='Number of search results to return')
+# Plan message
+parser.add_argument('--plan-message', dest='plan_message', action='store', 
+                    help="What message to use during planning",
+)                   
+
 DEFAULT_PROMPT="floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text"
 DEFAULT_API_BASE = os.environ.get("DEFAULT_API_BASE", "http://api:8080")
 # TTS api base
@@ -175,10 +183,10 @@ Function call: """
                 "type": "string",
                 "description": "reasoning behind the intent"
             },
-            "observation": {
-                "type": "string",
-                "description": "reasoning behind the intent"
-            },
+            # "observation": {
+            #     "type": "string",
+            #     "description": "reasoning behind the intent"
+            # },
             "action": {
                 "type": "string",
                 "enum": list(agent_actions.keys()),
@@ -259,15 +267,11 @@ Function call: """
     if response_message.get("function_call"):
         function_name = response.choices[0].message["function_call"].name
         function_parameters = response.choices[0].message["function_call"].arguments
-        logger.info("==> function name: ")
-        logger.info(function_name)
-        logger.info("==> function parameters: ")
-        logger.info(function_parameters)
+        logger.info("==> function parameters: {function_parameters}",function_parameters=function_parameters)
         function_to_call = agent_actions[function_name]["function"]
 
         function_result = function_to_call(function_parameters, agent_actions=agent_actions)
-        logger.info("==> function result: ")
-        logger.info(function_result)
+        logger.info("==> function result: {function_result}", function_result=function_result)
         messages.append(
             {
                 "role": "assistant",
@@ -289,9 +293,7 @@ def function_completion(messages, action="", agent_actions={}):
     function_call = "auto"
     if action != "":
         function_call={"name": action}
-    logger.info("==> function_call: ")
-    logger.info(function_call)
-
+    logger.info("==> function name: {function_call}", function_call=function_call)
     # get the functions from the signatures of the agent actions, if exists
     functions = []
     for action in agent_actions:
@@ -329,16 +331,7 @@ def process_history(conversation_history):
             messages+="Assistant message: "+message["content"]+"\n"
     return messages
 
-def clarify(responses, prefix=True):
-    # TODO: this needs to be optimized
-    if prefix:
-        responses.append(
-            {
-                "role": "system",
-                "content": "Return an appropriate answer to the user given the context above."
-            }
-        ) 
-
+def converse(responses):
     response = openai.ChatCompletion.create(
         model=LLM_MODEL,
         messages=responses,
@@ -357,31 +350,29 @@ def clarify(responses, prefix=True):
 
 ### Fine tune a string before feeding into the LLM
 
-def analyze(responses, prefix=True):
-    # TODO: this needs to be optimized
-    if prefix:
-        responses.append(
-            {
-                "role": "system",
-                "content": "Analyze the above text highlighting the relevant information. If there are errors, suggest solutions to fix them."
-            }
-        ) 
+def analyze(responses, prefix="Analyze the following text highlighting the relevant information and identify a list of actions to take if there are any. If there are errors, suggest solutions to fix them"):
+    string = process_history(responses)
+    messages = [
+        {
+        "role": "user",
+        "content": f"""{prefix}:
 
+```
+{string}
+```
+""",
+        }
+    ]
+ 
     response = openai.ChatCompletion.create(
         model=LLM_MODEL,
-        messages=responses,
+        messages=messages,
         stop=None,
         api_base=LOCALAI_API_BASE+"/v1",
         request_timeout=1200,
         temperature=0.1,
     )
-    responses.append(
-        {
-            "role": "assistant",
-            "content": response.choices[0].message["content"],
-        }
-    )
-    return responses
+    return  response.choices[0].message["content"]
 
 def post_process(string):
     messages = [
@@ -420,18 +411,27 @@ def save(memory, agent_actions={}):
     chroma_client.persist()
     return f"The object was saved permanently to memory."
 
-def search(query, agent_actions={}):
+def search_memory(query, agent_actions={}):
     q = json.loads(query)
     docs = chroma_client.similarity_search(q["reasoning"])
     text_res="Memories found in the database:\n"
     for doc in docs:
         text_res+="- "+doc.page_content+"\n"
+
+    if args.postprocess:
+        return post_process(text_res)
     return text_res
 
-def calculate_plan(user_input, agent_actions={}):
+def generate_plan(user_input, agent_actions={}):
     res = json.loads(user_input)
     logger.info("--> Calculating plan: {description}", description=res["description"])
     descriptions=action_description("",agent_actions)
+
+    plan_message = "The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
+    if args.plan_message:
+        plan_message = args.plan_message
+        # plan_message = "The assistant replies with a plan of 3 steps to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
+
     messages = [
             {"role": "user",
              "content": f"""Transcript of AI assistant responding to user requests. 
@@ -439,7 +439,7 @@ def calculate_plan(user_input, agent_actions={}):
 
 Request: {res["description"]}
 
-The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task.
+The assistant replies with a plan of 3 steps to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task.
 
 Function call: """
              }
@@ -463,10 +463,6 @@ Function call: """
                     "items": {
                         "type": "object",
                         "properties": {
-                            "observation": {
-                                "type": "string",
-                                "description": "subtask list",
-                            },
                             "reasoning": {
                                 "type": "string",
                                 "description": "subtask list",
@@ -566,29 +562,19 @@ def ddg(query: str, num_results: int, backend: str = "api") -> List[Dict[str, st
     return formatted_results
 
 ## Search on duckduckgo
-def search_duckduckgo(args, agent_actions={}):
-    args = json.loads(args)
-    list=ddg(args["query"], 5)
+def search_duckduckgo(a, agent_actions={}):
+    a = json.loads(a)
+    list=ddg(a["query"], args.search_results)
 
-    text_res="Internet search results:\n"
+    text_res="Internet search results:\n"   
     for doc in list:
         text_res+=f"""- {doc["snippet"]}. Source: {doc["title"]} - {doc["link"]}\n"""
+
+    #if args.postprocess:
+    #    return post_process(text_res)
     return text_res
     #l = json.dumps(list)
     #return l
-## Action to browse an url and return only text in the body (stripping HTML tags)
-def browse_url(args, agent_actions={}):
-    args = json.loads(args)
-    url=args["url"]
-    logger.info("==> browsing url: {url}", url=url)
-    response = requests.get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    # kill all script and style elements
-    for script in soup(["script", "style"]):
-        script.extract()    # rip it out
-    text_res=f"Website {url}:\n"
-    text_res+=soup.get_text()
-    return text_res
 
 ### End Agent capabilities
 ###
@@ -617,13 +603,13 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
 
     #if re_evaluate and not re_evaluation_in_progress:
     #    observation = analyze(conversation_history, prefix=True)
-    #    action_picker_message+="\n\nObservation: "+observation[-1]["content"]
+    #    action_picker_message+="\n\Thought: "+observation[-1]["content"]
     if re_evaluation_in_progress:
-        observation = analyze(conversation_history, prefix=True)
+        observation = analyze(conversation_history)
         action_picker_message="Decide from the output below if we have to do another action:\n"
         action_picker_message+="```\n"+user_input+"\n```"
-        action_picker_message+="\n\nObservation: "+observation[-1]["content"]
-
+        action_picker_message+="\n\nObservation: "+observation
+        # if there is no action to do, we can just reply to the user with REPLY_ACTION
     try:
         action = needs_to_do_action(action_picker_message,agent_actions=agent_actions)
     except Exception as e:
@@ -633,14 +619,15 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
 
     if action["action"] != REPLY_ACTION:
         logger.info("==> LocalAGI wants to call '{action}'", action=action["action"])
-        logger.info("==> Observation '{reasoning}'", reasoning=action["observation"])
+        #logger.info("==> Observation '{reasoning}'", reasoning=action["observation"])
         logger.info("==> Reasoning '{reasoning}'", reasoning=action["reasoning"])
-        reasoning = action["observation"] + "\n" + action["reasoning"]
-        if action["action"] == PLAN_ACTION:
-            logger.info("==> It's a plan <==: ")
 
-        if postprocess:
-            reasoning = post_process(reasoning)
+        reasoning = action["reasoning"]
+        if action["action"] == PLAN_ACTION:
+            logger.info("==> LocalAGI wants to create a plan that involves more actions ")
+
+        #if postprocess:
+            #reasoning = post_process(reasoning)
         
         #function_completion_message = "Conversation history:\n"+old_history+"\n"+
         function_completion_message = "Request: "+user_input+"\nReasoning: "+reasoning
@@ -652,28 +639,28 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
             # cycle subtasks and execute functions
             subtask_result=""
             for subtask in function_results["subtasks"]:
-                logger.info("==> subtask: ")
-                logger.info(subtask)
                 #ctr="Context: "+user_input+"\nThought: "+action["reasoning"]+ "\nRequest: "+subtask["reasoning"]
-                cr="Context: "+user_input+"\n"
+                #cr="Context: "+user_input+"\n"
+                cr="Reasoning: "+action["reasoning"]+ "\n"
                 #cr=""
                 if subtask_result != "" and subtaskContext:
                     # Include cumulative results of previous subtasks
                     # TODO: this grows context, maybe we should use a different approach or summarize
-                    if postprocess:
-                        cr+= "Subtask results: "+post_process(subtask_result)+"\n"
-                    else:
-                        cr+="Subtask results: "+subtask_result+"\n"
-                subtask_reasoning = subtask["observation"] + "\n" + subtask["reasoning"]
+                    ##if postprocess:
+                    ##    cr+= "Subtask results: "+post_process(subtask_result)+"\n"
+                    ##else:
+                    cr+="Subtask results: "+subtask_result+"\n"
+                subtask_reasoning = subtask["reasoning"]
 
+                logger.info("==> subtask '{subtask}' ({reasoning})", subtask=subtask["function"], reasoning=subtask_reasoning)
                 if postprocess:
                     cr+= "Request: "+post_process(subtask_reasoning)
                 else:
                     cr+= "Request: "+subtask_reasoning
                 subtask_response, function_results = process_functions(cr, subtask["function"],agent_actions=agent_actions)
-                subtask_result+=process_history(subtask_response)
-                if postprocess:
-                    subtask_result=post_process(subtask_result)
+                subtask_result+=process_history(subtask_response[1:])
+                # if postprocess:
+                #    subtask_result=post_process(subtask_result)
                 responses.extend(subtask_response)
         if re_evaluate:
             ## Better output or this infinite loops..
@@ -694,10 +681,15 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
             return conversation_history       
 
         # TODO: this needs to be optimized
-        responses = clarify(responses, prefix=True)
+        responses = analyze(responses, prefix=f"Return an appropriate answer to the user input '{user_input}' given the context below and summarizing the actions taken\n")
 
         # add responses to conversation history by extending the list
-        conversation_history.extend(responses)
+        conversation_history.append(
+            {
+            "role": "assistant",
+            "content": responses,
+            }
+        )
         # logger.info the latest response from the conversation history
         logger.info(conversation_history[-1]["content"])
         tts(conversation_history[-1]["content"])
@@ -709,9 +701,8 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti
             logger.info("==> LocalAGI will reply to the user")
             return conversation_history        
 
-        # TODO: this needs to be optimized
         # get the response from the model
-        response = clarify(conversation_history, prefix=False)
+        response = converse(conversation_history)
         
         # add the response to the conversation history by extending the list
         conversation_history.append(response)
@@ -782,31 +773,8 @@ agent_actions = {
             }
         },
     },
-    "browser": {
-        "function": browse_url,
-        "plannable": True,
-        "description": 'The assistant replies with the action "browser" to navigate links.',
-        "signature": {
-            "name": "browser",
-            "description": """Search in memory""",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "url": {
-                        "type": "string",
-                        "description": "reasoning behind the intent"
-                    },
-                    "reasoning": {
-                        "type": "string",
-                        "description": "reasoning behind the intent"
-                    },
-                },
-                "required": ["url"]
-            }
-        }, 
-    },
     "search_memory": {
-        "function": search,
+        "function": search_memory,
         "plannable": True,
         "description": 'The assistant replies with the action "search_memory" for searching between its memories with a query term.',
         "signature": {
@@ -825,7 +793,7 @@ agent_actions = {
         }, 
     },
     PLAN_ACTION: {
-        "function": calculate_plan,
+        "function": generate_plan,
         "plannable": False,
         "description": 'The assistant for solving complex tasks that involves calling more functions in sequence, replies with the action "'+PLAN_ACTION+'".',
         "signature": {
@@ -869,8 +837,8 @@ if not args.skip_avatar:
 if not args.prompt:
     actions = ""
     for action in agent_actions:
-        actions+=action+"\n"
-    logger.info("LocalAGI can do the following actions: {actions}", actions=actions)
+        actions+=" '"+action+"'"
+    logger.info("LocalAGI internally can do the following actions:{actions}", actions=actions)
 
 if not args.prompt:
     logger.info(">>> Interactive mode <<<")
@@ -891,8 +859,9 @@ if args.prompt:
         )
 else:
     # TODO: process functions also considering the conversation history? conversation history + input
+    logger.info(">>> Ready! What can I do for you? ( try with: plan a roadtrip to San Francisco ) <<<")
     while True:
-        user_input = input("> ")
+        user_input = input(">>> ")
         # we are going to use the args to change the evaluation behavior
         conversation_history=evaluate(
             user_input, 
diff --git a/requirements.txt b/requirements.txt
index 9a429bd..94ea8e8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,5 +5,4 @@ pysqlite3-binary
 requests
 ascii-magic
 loguru
-duckduckgo_search
-beautifulsoup4
\ No newline at end of file
+duckduckgo_search
\ No newline at end of file