From 655e95508a2d559bec600ecd427d4c7518cb0f79 Mon Sep 17 00:00:00 2001 From: mudler Date: Tue, 1 Aug 2023 22:47:31 +0200 Subject: [PATCH] Updates --- .env | 22 +- README.md | 8 +- ...rhot.yaml => gplatty-30b-superhot-8k.yaml} | 0 ...{preload-models.yaml => wizardlm-13b.yaml} | 0 ...load-models-30b.yaml => wizardlm-30b.yaml} | 7 +- main.py | 205 ++++++++++++++---- 6 files changed, 185 insertions(+), 57 deletions(-) rename config/{preload-models-30b-superhot.yaml => gplatty-30b-superhot-8k.yaml} (100%) rename config/{preload-models.yaml => wizardlm-13b.yaml} (100%) rename config/{preload-models-30b.yaml => wizardlm-30b.yaml} (80%) diff --git a/.env b/.env index b78f3e5..6fccca3 100644 --- a/.env +++ b/.env @@ -1,16 +1,24 @@ +# Enable debug mode in the LocalAI API DEBUG=true + +# Where models are stored MODELS_PATH=/models + +# Galleries to use GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}] -# 30b setup -PRELOAD_MODELS=[{"id":"huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin","name":"gpt-4","overrides":{"context_size":4096,"mmap":true,"f16":true,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95}}},{"id":"model-gallery@stablediffusion"},{"id":"model-gallery@voice-en-us-kathleen-low"},{"url":"github:go-skynet/model-gallery/base.yaml","name":"all-MiniLM-L6-v2","overrides":{"embeddings":true,"backend":"huggingface-embeddings","parameters":{"model":"all-MiniLM-L6-v2"}}},{"id":"huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin","name":"functions","overrides":{"context_size":4096,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"template":{"chat":"","completion":""},"roles":{"assistant":"ASSISTANT:","system":"SYSTEM:","assistant_function_call":"FUNCTION_CALL:","function":"FUNCTION CALL RESULT:"},"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95},"function":{"disable_no_action":true},"mmap":true,"f16":true}}] -# 13b setup -# PRELOAD_MODELS=[{"id":"huggingface@thebloke/wizardlm-13b-v1.0-uncensored-ggml/wizardlm-13b-v1.0-uncensored.ggmlv3.q4_k_m.bin","name":"gpt-4","overrides":{"context_size":2048,"mmap":true,"f16":true,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95}}},{"id":"model-gallery@stablediffusion"},{"id":"model-gallery@voice-en-us-kathleen-low"},{"url":"github:go-skynet/model-gallery/base.yaml","name":"all-MiniLM-L6-v2","overrides":{"embeddings":true,"backend":"huggingface-embeddings","parameters":{"model":"all-MiniLM-L6-v2"}}},{"id":"huggingface@thebloke/wizardlm-13b-v1.0-uncensored-ggml/wizardlm-13b-v1.0-uncensored.ggmlv3.q4_0.bin","name":"functions","overrides":{"context_size":2048,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"template":{"chat":"","completion":""},"roles":{"assistant":"ASSISTANT:","system":"SYSTEM:","assistant_function_call":"FUNCTION_CALL:","function":"FUNCTION CALL RESULT:"},"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95},"function":{"disable_no_action":true},"mmap":true,"f16":true}}] +# Select model configuration in the config directory +PRELOAD_MODELS_CONFIG=/config/wizardlm-13b.yaml -#PRELOAD_MODELS_CONFIG=/config/preload-models.yaml -# 30b superhot setup -#PRELOAD_MODELS=[{"id":"huggingface@thebloke/gplatty-30b-superhot-8k-ggml/gplatty-30b-superhot-8k.ggmlv3.q2_k.bin","name":"gpt-4","overrides":{"context_size":8192,"mmap":true,"f16":true,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95,"rope_freq_scale":0.25}}},{"id":"model-gallery@stablediffusion"},{"id":"model-gallery@voice-en-us-kathleen-low"},{"url":"github:go-skynet/model-gallery/base.yaml","name":"all-MiniLM-L6-v2","overrides":{"embeddings":true,"backend":"huggingface-embeddings","parameters":{"model":"all-MiniLM-L6-v2"}}},{"id":"huggingface@thebloke/gplatty-30b-superhot-8k-ggml/gplatty-30b-superhot-8k.ggmlv3.q2_k.bin","name":"functions","overrides":{"context_size":8192,"mirostat":2,"mirostat_tau":5,"mirostat_eta":0.1,"template":{"chat":"","completion":""},"roles":{"assistant":"ASSISTANT:","system":"SYSTEM:","assistant_function_call":"FUNCTION_CALL:","function":"FUNCTION CALL RESULT:"},"parameters":{"temperature":0.1,"top_k":40,"top_p":0.95,"rope_freq_scale":0.25},"function":{"disable_no_action":true},"mmap":true,"f16":true}}] +# You don't need to put a valid OpenAI key, however, the python libraries expect +# the string to be set or panics OPENAI_API_KEY=sk--- + +# Set the OpenAI API base URL to point to LocalAI OPENAI_API_BASE=http://api:8080 + +# Set an image path IMAGE_PATH=/tmp + +# Set number of default threads THREADS=14 \ No newline at end of file diff --git a/README.md b/README.md index 33b602d..52a5042 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Note: this is a fun project, not a serious one. Be warned! It is a dead simple experiment to show how to tie the various LocalAI functionalities to create a virtual assistant that can do tasks. It is simple on purpose, trying to be minimalistic and easy to understand and customize. -It is different from babyAGI or AutoGPT as it uses [OpenAI functions](https://openai.com/blog/function-calling-and-other-api-updates), but locally with [LocalAI](https://localai.io) (no API keys needed!) +It is different from babyAGI or AutoGPT as it uses [OpenAI functions](https://openai.com/blog/function-calling-and-other-api-updates) - it is a from scratch attempt built on purpose to run locally with [LocalAI](https://localai.io) (no API keys needed!) instead of expensive, cloud services. ## Quick start @@ -33,6 +33,12 @@ Ask it to: -> and watch it engaging into dialogues with long-term memory - "I want you to act as a marketing and sales guy in a startup company. I want you to come up with a plan to support our new latest project, XXX, which is an open source project. you are free to come up with creative ideas to engage and attract new people to the project. The XXX project is XXX." +### Caveats + +The "goodness" of a model has a big impact on how μAGI works. Currently `13b` models are powerful enough to actually able to perform multi-step tasks or do more actions. However, it is quite slow when running on CPU (no big surprise here). + +The context size is a limitation - you can find in the `config` examples to run with superhot 8k context size, but the quality is not good enough to perform complex tasks. + ### How it works? `microAGI` just does the minimal around LocalAI functions to create a virtual assistant that can do generic tasks. It works by an endless loop of `intent detection`, `function invocation`, `self-evaluation` and `reply generation` (if it decides to reply! :)). The agent is capable of planning complex tasks by invoking multiple functions, and remember things from the conversation. diff --git a/config/preload-models-30b-superhot.yaml b/config/gplatty-30b-superhot-8k.yaml similarity index 100% rename from config/preload-models-30b-superhot.yaml rename to config/gplatty-30b-superhot-8k.yaml diff --git a/config/preload-models.yaml b/config/wizardlm-13b.yaml similarity index 100% rename from config/preload-models.yaml rename to config/wizardlm-13b.yaml diff --git a/config/preload-models-30b.yaml b/config/wizardlm-30b.yaml similarity index 80% rename from config/preload-models-30b.yaml rename to config/wizardlm-30b.yaml index ef4ca8e..1736a2e 100644 --- a/config/preload-models-30b.yaml +++ b/config/wizardlm-30b.yaml @@ -1,5 +1,6 @@ -- id: huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin - name: "gpt-4" +- id: huggingface@thebloke/wizardlm-30b-uncensored-ggml/wizardlm-30b-uncensored.ggmlv3.q2_k.bin + galleryModel: + name: "gpt-4" overrides: context_size: 4096 mmap: true @@ -20,7 +21,7 @@ backend: huggingface-embeddings parameters: model: all-MiniLM-L6-v2 -- id: huggingface@thebloke/gplatty-30b-ggml/gplatty-30b.ggmlv3.q2_k.bin +- id: huggingface@thebloke/wizardlm-30b-uncensored-ggml/wizardlm-30b-uncensored.ggmlv3.q2_k.bin name: functions overrides: context_size: 4096 diff --git a/main.py b/main.py index 4cac828..2e5acc0 100644 --- a/main.py +++ b/main.py @@ -21,34 +21,42 @@ import os # Parse arguments such as system prompt and batch mode import argparse -parser = argparse.ArgumentParser(description='microAGI') +parser = argparse.ArgumentParser(description='μAGI') parser.add_argument('--system-prompt', dest='system_prompt', action='store', help='System prompt to use') -parser.add_argument('--batch-mode', dest='batch_mode', action='store_true', default=False, - help='Batch mode') +parser.add_argument('--prompt', dest='prompt', action='store', default=False, + help='Prompt mode') # skip avatar creation parser.add_argument('--skip-avatar', dest='skip_avatar', action='store_true', default=False, help='Skip avatar creation') +# Reevaluate +parser.add_argument('--re-evaluate', dest='re_evaluate', action='store_true', default=False, + help='Reevaluate if another action is needed or we have completed the user request') +# Postprocess +parser.add_argument('--postprocess', dest='postprocess', action='store_true', default=False, + help='Postprocess the reasoning') +# Subtask context +parser.add_argument('--subtask-context', dest='subtaskContext', action='store_true', default=False, + help='Include context in subtasks') args = parser.parse_args() FUNCTIONS_MODEL = os.environ.get("FUNCTIONS_MODEL", "functions") +EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", "all-MiniLM-L6-v2") LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4") VOICE_MODEL= os.environ.get("TTS_MODEL","en-us-kathleen-low.onnx") DEFAULT_SD_MODEL = os.environ.get("DEFAULT_SD_MODEL", "stablediffusion") DEFAULT_SD_PROMPT = os.environ.get("DEFAULT_SD_PROMPT", "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text") PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/data") +## Constants REPLY_ACTION = "reply" -PLAN_ACTION = "plan" -#embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") -embeddings = LocalAIEmbeddings(model="all-MiniLM-L6-v2") +PLAN_ACTION = "multitask_action" +embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL) chroma_client = Chroma(collection_name="memories", persist_directory="db", embedding_function=embeddings) - - -# Function to create images with OpenAI +# Function to create images with LocalAI def display_avatar(input_text=DEFAULT_SD_PROMPT, model=DEFAULT_SD_MODEL): response = openai.Image.create( prompt=input_text, @@ -61,6 +69,7 @@ def display_avatar(input_text=DEFAULT_SD_PROMPT, model=DEFAULT_SD_MODEL): my_art = AsciiArt.from_url(image_url) my_art.to_terminal() +# Function to create audio with LocalAI def tts(input_text, model=VOICE_MODEL): # strip newlines from text input_text = input_text.replace("\n", ".") @@ -88,12 +97,11 @@ def tts(input_text, model=VOICE_MODEL): # remove the audio file os.remove(output_file_path) +# Function to analyze the user input and pick the next action to do def needs_to_do_action(user_input,agent_actions={}): # Get the descriptions and the actions name (the keys) - descriptions="" - for action in agent_actions: - descriptions+=agent_actions[action]["description"]+"\n" + descriptions=action_description("", agent_actions) messages = [ {"role": "user", @@ -151,11 +159,35 @@ Function call: """ return res return {"action": REPLY_ACTION} +# This is used to collect the descriptions of the agent actions, used to populate the LLM prompt +def action_description(action, agent_actions): + descriptions="" + # generate descriptions of actions that the agent can pick + for a in agent_actions: + if ( action != "" and action == a ) or (action == ""): + descriptions+=agent_actions[a]["description"]+"\n" + return descriptions + +## This function is called to ask the user if does agree on the action to take and execute +def ask_user_confirmation(action_name, action_parameters): + logger.info("==> Ask user confirmation") + logger.info("==> action_name: {action_name}", action_name=action_name) + logger.info("==> action_parameters: {action_parameters}", action_parameters=action_parameters) + # Ask via stdin + logger.info("==> Do you want to execute the action? (y/n)") + user_input = input() + if user_input == "y": + logger.info("==> Executing action") + return True + else: + logger.info("==> Skipping action") + return False + +### This function is used to process the functions given a user input. +### It picks a function, executes it and returns the list of messages containing the result. def process_functions(user_input, action="", agent_actions={}): - descriptions="" - for a in agent_actions: - descriptions+=agent_actions[a]["description"]+"\n" + descriptions=action_description(action, agent_actions) messages = [ # {"role": "system", "content": "You are a helpful assistant."}, @@ -179,6 +211,7 @@ Function call: """ logger.info("==> function parameters: ") logger.info(function_parameters) function_to_call = agent_actions[function_name]["function"] + function_result = function_to_call(function_parameters, agent_actions=agent_actions) logger.info("==> function result: ") logger.info(function_result) @@ -198,6 +231,7 @@ Function call: """ ) return messages, function_result +### function_completion is used to autocomplete functions given a list of messages def function_completion(messages, action="", agent_actions={}): function_call = "auto" if action != "": @@ -223,7 +257,8 @@ def function_completion(messages, action="", agent_actions={}): return response -# Gets the content of each message in the history +# Rework the content of each message in the history in a way that is understandable by the LLM +# TODO: switch to templates (?) def process_history(conversation_history): messages = "" for message in conversation_history: @@ -240,8 +275,10 @@ def process_history(conversation_history): messages+="Assistant message: "+message["content"]+"\n" return messages - -def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_actions={},re_evaluation_in_progress=False): +### Main evaluate function +### This function evaluates in a continuous loop the user input and the conversation history. +### It returns the conversation history with the latest response from the assistant. +def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_actions={},re_evaluation_in_progress=False, postprocess=False, subtaskContext=False): messages = [ { @@ -272,11 +309,14 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti action = {"action": REPLY_ACTION} if action["action"] != REPLY_ACTION: - logger.info("==> microAGI wants to call '{action}'", action=action["action"]) + logger.info("==> μAGI wants to call '{action}'", action=action["action"]) logger.info("==> Reasoning '{reasoning}'", reasoning=action["reasoning"]) if action["action"] == PLAN_ACTION: logger.info("==> It's a plan <==: ") + if postprocess: + action["reasoning"] = post_process(action["reasoning"]) + #function_completion_message = "Conversation history:\n"+old_history+"\n"+ function_completion_message = "Request: "+user_input+"\nReasoning: "+action["reasoning"] responses, function_results = process_functions(function_completion_message, action=action["action"], agent_actions=agent_actions) @@ -291,18 +331,33 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti logger.info(subtask) #ctr="Context: "+user_input+"\nThought: "+action["reasoning"]+ "\nRequest: "+subtask["reasoning"] cr="Context: "+user_input+"\n" - if subtask_result != "": + #cr="" + if subtask_result != "" and subtaskContext: # Include cumulative results of previous subtasks # TODO: this grows context, maybe we should use a different approach or summarize - cr+="Subtask results: "+subtask_result+"\n" - cr+="Request: "+subtask["reasoning"] + if postprocess: + cr+= "Subtask results: "+post_process(subtask_result)+"\n" + else: + cr+="Subtask results: "+subtask_result+"\n" + + if postprocess: + cr+= "Request: "+post_process(subtask["reasoning"]) + else: + cr+= "Request: "+subtask["reasoning"] subtask_response, function_results = process_functions(cr, subtask["function"],agent_actions=agent_actions) subtask_result+=process_history(subtask_response) responses.extend(subtask_response) if re_evaluate: ## Better output or this infinite loops.. logger.info("-> Re-evaluate if another action is needed") - responses = evaluate(user_input+"\n Conversation history: \n"+process_history(responses[1:]), responses, re_evaluate,agent_actions=agent_actions,re_evaluation_in_progress=True) + ## ? conversation history should go after the user_input maybe? + re_eval = user_input +"\n" + re_eval += "Conversation history: \n" + if postprocess: + re_eval+= post_process(process_history(responses[1:])) +"\n" + else: + re_eval+= process_history(responses[1:]) +"\n" + responses = evaluate(re_eval, responses, re_evaluate,agent_actions=agent_actions,re_evaluation_in_progress=True) if re_evaluation_in_progress: conversation_history.extend(responses) @@ -337,8 +392,8 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti logger.info("==> no action needed") if re_evaluation_in_progress: - logger.info("==> microAGI has completed the user request") - logger.info("==> microAGI will reply to the user") + logger.info("==> μAGI has completed the user request") + logger.info("==> μAGI will reply to the user") return conversation_history # get the response from the model @@ -356,10 +411,35 @@ def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_acti tts(conversation_history[-1]["content"]) return conversation_history +### Fine tune a string before feeding into the LLM +def post_process(string): + messages = [ + { + "role": "user", + "content": f"""Summarize the following text, keeping the relevant information: +``` +{string} +``` +""", + } + ] + logger.info("==> Post processing: {string}", string=string) + # get the response from the model + response = openai.ChatCompletion.create( + model=LLM_MODEL, + messages=messages, + stop=None, + temperature=0.1, + request_timeout=1200, + ) + result = response["choices"][0]["message"]["content"] + logger.info("==> Processed: {string}", string=result) + return result ### Agent capabilities - +### These functions are called by the agent to perform actions +### def save(memory, agent_actions={}): q = json.loads(memory) logger.info(">>> saving to memories: ") @@ -379,12 +459,16 @@ def search(query, agent_actions={}): def calculate_plan(user_input, agent_actions={}): res = json.loads(user_input) logger.info("--> Calculating plan: {description}", description=res["description"]) + descriptions=action_description("",agent_actions) messages = [ {"role": "user", "content": f"""Transcript of AI assistant responding to user requests. -Replies with a plan to achieve the user's goal with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed instruction to fullfill the task. +{descriptions} Request: {res["description"]} + +The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task. + Function call: """ } ] @@ -512,8 +596,10 @@ def search_duckduckgo(args, agent_actions={}): return l ### End Agent capabilities +### +### Agent action definitions agent_actions = { "search_internet": { "function": search_duckduckgo, @@ -555,12 +641,12 @@ agent_actions = { } }, }, - "remember": { + "save_memory": { "function": save, "plannable": True, - "description": 'The assistant replies with the action "remember" and the string to save in order to remember something or save an information that thinks it is relevant permanently.', + "description": 'The assistant replies with the action "save_memory" and the string to remember or store an information that thinks it is relevant permanently.', "signature": { - "name": "remember", + "name": "save_memory", "description": """Save or store informations into memory.""", "parameters": { "type": "object", @@ -574,12 +660,12 @@ agent_actions = { } }, }, - "recall": { + "search_memory": { "function": search, "plannable": True, - "description": 'The assistant replies with the action "recall" for searching between its memories with a query term.', + "description": 'The assistant replies with the action "search_memory" for searching between its memories with a query term.', "signature": { - "name": "recall", + "name": "search_memory", "description": """Search in memory""", "parameters": { "type": "object", @@ -622,23 +708,50 @@ agent_actions = { conversation_history = [] # Set a system prompt if SYSTEM_PROMPT is set -if os.environ.get("SYSTEM_PROMPT"): +if os.environ.get("SYSTEM_PROMPT") or args.system_prompt: + sprompt = os.environ.get("SYSTEM_PROMPT", args.system_prompt) conversation_history.append({ "role": "system", - "content": os.environ.get("SYSTEM_PROMPT") + "content": sprompt }) -logger.info("Welcome to microAGI") -logger.info("Creating avatar, please wait...") +logger.info("Welcome to μAGI") -display_avatar() +# Skip avatar creation if --skip-avatar is set +if not args.skip_avatar: + logger.info("Creating avatar, please wait...") + display_avatar() -logger.info("Welcome to microAGI") -logger.info("microAGI has the following actions available at its disposal:") -for action in agent_actions: - logger.info("{action} - {description}", action=action, description=agent_actions[action]["description"]) +if not args.prompt: + logger.info("μAGI has the following actions available at its disposal:") + for action in agent_actions: + logger.info("{action} - {description}", action=action, description=agent_actions[action]["description"]) +else: + logger.info(">>> Prompt mode <<<") + logger.info(args.prompt) -# TODO: process functions also considering the conversation history? conversation history + input -while True: - user_input = input("> ") - conversation_history=evaluate(user_input, conversation_history, re_evaluate=True, agent_actions=agent_actions) \ No newline at end of file +# IF in prompt mode just evaluate, otherwise loop +if args.prompt: + evaluate( + args.prompt, + conversation_history, + re_evaluate=args.re_evaluate, + agent_actions=agent_actions, + # Enable to lower context usage but increases LLM calls + postprocess=args.postprocess, + subtaskContext=args.subtaskContext, + ) +else: + # TODO: process functions also considering the conversation history? conversation history + input + while True: + user_input = input("> ") + # we are going to use the args to change the evaluation behavior + conversation_history=evaluate( + user_input, + conversation_history, + re_evaluate=args.re_evaluate, + agent_actions=agent_actions, + # Enable to lower context usage but increases LLM calls + postprocess=args.postprocess, + subtaskContext=args.subtaskContext, + ) \ No newline at end of file