move logic to a class that can be imported

2023-08-19 23:47:26 +02:00
parent 0f418976f5
commit 4c1c601417
4 changed files with 686 additions and 598 deletions
--- a/2
+++ b/2
@@ -14,5 +14,5 @@ RUN apt-get update -y && \
    apt-get clean
 COPY . /app
-
+RUN pip install .
 ENTRYPOINT [ "python", "./main.py" ];
--- a/main.py
+++ b/main.py
@@ -2,15 +2,14 @@ import openai
 #from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.embeddings import LocalAIEmbeddings
 import uuid
 import requests
 import ast
 import sys
-from contextlib import redirect_stdout
+
 from localagi import LocalAGI
 from loguru import logger
 from ascii_magic import AsciiArt
 from duckduckgo_search import DDGS
-from typing import Dict, List, Optional
+from typing import Dict, List
-import subprocess
+import os
 # these three lines swap the stdlib sqlite3 lib with the pysqlite3 package for chroma
 __import__('pysqlite3')
@@ -97,6 +96,24 @@ parser.add_argument('--debug', dest='debug', action='store_true', default=False,
 # Parse arguments
 args = parser.parse_args()
 STABLEDIFFUSION_MODEL = os.environ.get("STABLEDIFFUSION_MODEL", args.stablediffusion_model)
 STABLEDIFFUSION_PROMPT = os.environ.get("STABLEDIFFUSION_PROMPT", args.stablediffusion_prompt)
 FUNCTIONS_MODEL = os.environ.get("FUNCTIONS_MODEL", args.functions_model)
 EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", args.embeddings_model)
 LLM_MODEL = os.environ.get("LLM_MODEL", args.llm_model)
 VOICE_MODEL= os.environ.get("TTS_MODEL",args.tts_model)
 STABLEDIFFUSION_MODEL = os.environ.get("STABLEDIFFUSION_MODEL",args.stablediffusion_model)
 STABLEDIFFUSION_PROMPT = os.environ.get("STABLEDIFFUSION_PROMPT", args.stablediffusion_prompt)
 PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/data")
 SYSTEM_PROMPT = ""
 if os.environ.get("SYSTEM_PROMPT") or args.system_prompt:
    SYSTEM_PROMPT = os.environ.get("SYSTEM_PROMPT", args.system_prompt)
 LOCALAI_API_BASE = args.localai_api_base
 TTS_API_BASE = args.tts_api_base
 IMAGE_API_BASE = args.images_api_base
 EMBEDDINGS_API_BASE = args.embeddings_api_base
 # Set log level
 LOG_LEVEL = "INFO"
@@ -134,125 +151,12 @@ embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL,openai_api_base=EMBEDDINGS
 chroma_client = Chroma(collection_name="memories", persist_directory="db", embedding_function=embeddings)
 # Function to create images with LocalAI
-def display_avatar(input_text=STABLEDIFFUSION_PROMPT, model=STABLEDIFFUSION_MODEL):
+def display_avatar(agi, input_text=STABLEDIFFUSION_PROMPT, model=STABLEDIFFUSION_MODEL):
-    response = openai.Image.create(
+    image_url = agi.get_avatar(input_text, model)
        prompt=input_text,
        n=1,
        size="128x128",
        api_base=IMAGE_API_BASE+"/v1"
    )
    image_url = response['data'][0]['url']
    # convert the image to ascii art
    my_art = AsciiArt.from_url(image_url)
    my_art.to_terminal()
 # Function to create audio with LocalAI
 def tts(input_text, model=VOICE_MODEL):
    # strip newlines from text
    input_text = input_text.replace("\n", ".")
    # Create a temp file to store the audio output
    output_file_path = '/tmp/output.wav'
    # get from OPENAI_API_BASE env var
    url = TTS_API_BASE + '/tts'
    headers = {'Content-Type': 'application/json'}
    data = {
        "input": input_text,
        "model": model
    }
    response = requests.post(url, headers=headers, data=json.dumps(data))
    if response.status_code == 200:
        with open(output_file_path, 'wb') as f:
            f.write(response.content)
        logger.info('Audio file saved successfully:', output_file_path)
    else:
        logger.info('Request failed with status code', response.status_code)
    try:
        # Use aplay to play the audio
        os.system('aplay ' + output_file_path)
        # remove the audio file
        os.remove(output_file_path)
    except:
        logger.info('Unable to play audio')
 # Function to analyze the user input and pick the next action to do
 def needs_to_do_action(user_input,agent_actions={}):
    # Get the descriptions and the actions name (the keys)
    descriptions=action_description("", agent_actions)
    messages = [
            {"role": "user",
             "content": f"""Transcript of AI assistant responding to user requests. Replies with the action to perform and the reasoning.
 {descriptions}"""},
            {"role": "user",
   "content": f"""{user_input}
 Function call: """
             }
        ]
    functions = [
        {
        "name": "intent",
        "description": """Decide to do an action.""",
        "parameters": {
            "type": "object",
            "properties": {
            "confidence": {
                "type": "number",
                "description": "confidence of the action"
            },
            "reasoning": {
                "type": "string",
                "description": "reasoning behind the intent"
            },
            # "observation": {
            #     "type": "string",
            #     "description": "reasoning behind the intent"
            # },
            "action": {
                "type": "string",
                "enum": list(agent_actions.keys()),
                "description": "user intent"
            },
            },
            "required": ["action"]
        }
        },    
    ]
    response = openai.ChatCompletion.create(
        #model="gpt-3.5-turbo",
        model=FUNCTIONS_MODEL,
        messages=messages,
        request_timeout=1200,
        functions=functions,
        api_base=LOCALAI_API_BASE+"/v1",
        stop=None,
        temperature=0.1,
        #function_call="auto"
        function_call={"name": "intent"},
    )
    response_message = response["choices"][0]["message"]
    if response_message.get("function_call"):
        function_name = response.choices[0].message["function_call"].name
        function_parameters = response.choices[0].message["function_call"].arguments
        # read the json from the string
        res = json.loads(function_parameters)
        logger.debug(">>> function name: "+function_name)
        logger.debug(">>> function parameters: "+function_parameters)
        return res
    return {"action": REPLY_ACTION}
 # This is used to collect the descriptions of the agent actions, used to populate the LLM prompt
 def action_description(action, agent_actions):
    descriptions=""
    # generate descriptions of actions that the agent can pick
    for a in agent_actions:
        if ( action != "" and action == a ) or (action == ""):
            descriptions+=agent_actions[a]["description"]+"\n"
    return descriptions
 ## This function is called to ask the user if does agree on the action to take and execute
 def ask_user_confirmation(action_name, action_parameters):
    logger.info("==> Ask user confirmation")
@@ -268,186 +172,11 @@ def ask_user_confirmation(action_name, action_parameters):
        logger.info("==> Skipping action")
        return False
 ### This function is used to process the functions given a user input.
 ### It picks a function, executes it and returns the list of messages containing the result.
 def process_functions(user_input, action="", agent_actions={}):
    descriptions=action_description(action, agent_actions)
    messages = [
         #   {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user",
             "content": f"""Transcript of AI assistant responding to user requests. Replies with the action to perform, including reasoning, and the confidence interval from 0 to 100.
 {descriptions}"""},
            {"role": "user",
   "content": f"""{user_input}
 Function call: """
             }
        ]
    response = function_completion(messages, action=action,agent_actions=agent_actions)
    response_message = response["choices"][0]["message"]
    response_result = ""
    function_result = {}
    if response_message.get("function_call"):
        function_name = response.choices[0].message["function_call"].name
        function_parameters = response.choices[0].message["function_call"].arguments
        logger.debug("==> function parameters: {function_parameters}",function_parameters=function_parameters)
        function_to_call = agent_actions[function_name]["function"]
        function_result = function_to_call(function_parameters, agent_actions=agent_actions)
        logger.info("==> function result: {function_result}", function_result=function_result)
        messages.append(
            {
                "role": "assistant",
                "content": None,
                "function_call": {"name": function_name, "arguments": function_parameters,},
            }
        )
        messages.append(
            {
                "role": "function",
                "name": function_name,
                "content": str(function_result)
            }
        )
    return messages, function_result
 ### function_completion is used to autocomplete functions given a list of messages
 def function_completion(messages, action="", agent_actions={}):
    function_call = "auto"
    if action != "":
        function_call={"name": action}
    logger.debug("==> function name: {function_call}", function_call=function_call)
    # get the functions from the signatures of the agent actions, if exists
    functions = []
    for action in agent_actions:
        if agent_actions[action].get("signature"):
            functions.append(agent_actions[action]["signature"])
    response = openai.ChatCompletion.create(
        #model="gpt-3.5-turbo",
        model=FUNCTIONS_MODEL,
        messages=messages,
        functions=functions,
        request_timeout=1200,
        stop=None,
        api_base=LOCALAI_API_BASE+"/v1",
        temperature=0.1,
        function_call=function_call
    )
    return response
 # Rework the content of each message in the history in a way that is understandable by the LLM
 # TODO: switch to templates (?)
 def process_history(conversation_history):
    messages = ""
    for message in conversation_history:
        # if there is content append it
        if message.get("content") and message["role"] == "function":
            messages+="Function result: \n" + message["content"]+"\n"
        elif message.get("function_call"):
            # encode message["function_call" to json and appends it
            fcall = json.dumps(message["function_call"])
            parameters = "calling " + message["function_call"]["name"]+" with arguments:"
            args=json.loads(message["function_call"]["arguments"])
            for arg in args:
                logger.debug(arg)
                logger.debug(args)
                v=args[arg]
                parameters+=f""" {arg}=\"{v}\""""
            messages+= parameters+"\n"
        elif message.get("content") and message["role"] == "user":
            messages+=message["content"]+"\n"
        elif message.get("content") and message["role"] == "assistant":
            messages+="Assistant message: "+message["content"]+"\n"
    return messages
 def converse(responses):
    response = openai.ChatCompletion.create(
        model=LLM_MODEL,
        messages=responses,
        stop=None,
        api_base=LOCALAI_API_BASE+"/v1",
        request_timeout=1200,
        temperature=0.1,
    )
    responses.append(
        {
            "role": "assistant",
            "content": response.choices[0].message["content"],
        }
    )
    return responses
 ### Fine tune a string before feeding into the LLM
 def analyze(responses, prefix="Analyze the following text highlighting the relevant information and identify a list of actions to take if there are any. If there are errors, suggest solutions to fix them", suffix=""):
    string = process_history(responses)
    messages = []
    if prefix != "":
        messages = [
            {
            "role": "user",
            "content": f"""{prefix}:
    ```
    {string}
    ```
    """,
            }
        ]
    else:
        messages = [
            {
            "role": "user",
            "content": f"""{string}""",
            }
        ]
    if suffix != "":
        messages[0]["content"]+=f"""{suffix}"""
    response = openai.ChatCompletion.create(
        model=LLM_MODEL,
        messages=messages,
        stop=None,
        api_base=LOCALAI_API_BASE+"/v1",
        request_timeout=1200,
        temperature=0.1,
    )
    return  response.choices[0].message["content"]
 def post_process(string):
    messages = [
        {
        "role": "user",
        "content": f"""Summarize the following text, keeping the relevant information:
 ```
 {string}
 ```
 """,
        }
    ]
    logger.info("==> Post processing: {string}", string=string)
    # get the response from the model
    response = openai.ChatCompletion.create(
        model=LLM_MODEL,
        messages=messages,
        api_base=LOCALAI_API_BASE+"/v1",
        stop=None,
        temperature=0.1,
        request_timeout=1200,
    )
    result = response["choices"][0]["message"]["content"]
    logger.info("==> Processed: {string}", string=result)
    return result
 ### Agent capabilities
 ### These functions are called by the agent to perform actions
 ###
-def save(memory, agent_actions={}):
+def save(memory, agent_actions={}, localagi=None):
    q = json.loads(memory)
    logger.info(">>> saving to memories: ") 
    logger.info(q["content"])
@@ -455,7 +184,7 @@ def save(memory, agent_actions={}):
    chroma_client.persist()
    return f"The object was saved permanently to memory."
-def search_memory(query, agent_actions={}):
+def search_memory(query, agent_actions={}, localagi=None):
    q = json.loads(query)
    docs = chroma_client.similarity_search(q["reasoning"])
    text_res="Memories found in the database:\n"
@@ -465,87 +194,11 @@ def search_memory(query, agent_actions={}):
    #if args.postprocess:
    #    return post_process(text_res)
    #return text_res
-    return post_process(text_res)
+    return localagi.post_process(text_res)
 def generate_plan(user_input, agent_actions={}):
    res = json.loads(user_input)
    logger.info("--> Calculating plan: {description}", description=res["description"])
    descriptions=action_description("",agent_actions)
    plan_message = "The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
    if args.plan_message:
        plan_message = args.plan_message
        # plan_message = "The assistant replies with a plan of 3 steps to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
    messages = [
            {"role": "user",
             "content": f"""Transcript of AI assistant responding to user requests. 
 {descriptions}
 Request: {plan_message}
 Thought: {res["description"]}
 Function call: """
             }
        ]
    # get list of plannable actions
    plannable_actions = []
    for action in agent_actions:
        if agent_actions[action]["plannable"]:
            # append the key of the dict to plannable_actions
            plannable_actions.append(action)
    functions = [
        {
        "name": "plan",
        "description": """Decide to do an action.""",
        "parameters": {
            "type": "object",
            "properties": {
                "subtasks": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "reasoning": {
                                "type": "string",
                                "description": "subtask list",
                            },
                            "function": {
                                "type": "string",
                                "enum": plannable_actions,
                            },               
                        },
                    },
                },
            },
            "required": ["subtasks"]
        }
        },    
    ]
    response = openai.ChatCompletion.create(
        #model="gpt-3.5-turbo",
        model=FUNCTIONS_MODEL,
        messages=messages,
        functions=functions,
        api_base=LOCALAI_API_BASE+"/v1",
        stop=None,
        temperature=0.1,
        #function_call="auto"
        function_call={"name": "plan"},
    )
    response_message = response["choices"][0]["message"]
    if response_message.get("function_call"):
        function_name = response.choices[0].message["function_call"].name
        function_parameters = response.choices[0].message["function_call"].arguments
        # read the json from the string
        res = json.loads(function_parameters)
        logger.debug("<<< function name: {function_name} >>>> parameters: {parameters}", function_name=function_name,parameters=function_parameters)
        return res
    return {"action": REPLY_ACTION}
 # write file to disk with content
-def write_file(arg, agent_actions={}):
+def write_file(arg, agent_actions={}, localagi=None):
    arg = json.loads(arg)
    filename = arg["filename"]
    content = arg["content"]
@@ -605,7 +258,7 @@ def ddg(query: str, num_results: int, backend: str = "api") -> List[Dict[str, st
    return formatted_results
 ## Search on duckduckgo
-def search_duckduckgo(a, agent_actions={}):
+def search_duckduckgo(a, agent_actions={}, localagi=None):
    a = json.loads(a)
    list=ddg(a["query"], args.search_results)
@@ -622,153 +275,6 @@ def search_duckduckgo(a, agent_actions={}):
 ### End Agent capabilities
 ###
 ### Main evaluate function
 ### This function evaluates the user input and the conversation history.
 ### It returns the conversation history with the latest response from the assistant.
 def evaluate(user_input, conversation_history = [],re_evaluate=False, agent_actions={},re_evaluation_in_progress=False, postprocess=False, subtaskContext=False, processed_messages=0):
    messages = [
        {
        "role": "user",
        "content": user_input,
        }
    ]
    conversation_history.extend(messages)
    # pulling the old history make the context grow exponentially
    # and most importantly it repeates the first message with the commands again and again.
    # it needs a bit of cleanup and process the messages and piggyback more LocalAI functions templates
    # old_history = process_history(conversation_history)
    # action_picker_message = "Conversation history:\n"+old_history
    # action_picker_message += "\n"
    action_picker_message = "Request: "+user_input
    picker_actions = agent_actions
    if args.force_action:
        aa = {}
        aa[args.force_action] = agent_actions[args.force_action]
        picker_actions = aa
        logger.info("==> Forcing action to '{action}' as requested by the user", action=args.force_action)
    #if re_evaluate and not re_evaluation_in_progress:
    #    observation = analyze(conversation_history, prefix=True)
    #    action_picker_message+="\n\Thought: "+observation[-1]["content"]
    if re_evaluation_in_progress:
        observation = analyze(conversation_history)
        action_picker_message="Decide from the output below if we have to do another action:\n"
        action_picker_message+="```\n"+user_input+"\n```"
        action_picker_message+="\n\nObservation: "+observation
        # if there is no action to do, we can just reply to the user with REPLY_ACTION
    try:
        action = needs_to_do_action(action_picker_message,agent_actions=picker_actions)
    except Exception as e:
        logger.error("==> error: ")
        logger.error(e)
        action = {"action": REPLY_ACTION}
    if action["action"] != REPLY_ACTION:
        logger.info("==> LocalAGI wants to call '{action}'", action=action["action"])
        #logger.info("==> Observation '{reasoning}'", reasoning=action["observation"])
        logger.info("==> Reasoning '{reasoning}'", reasoning=action["reasoning"])
        # Force executing a plan instead
        reasoning = action["reasoning"]
        if action["action"] == PLAN_ACTION:
            logger.info("==> LocalAGI wants to create a plan that involves more actions ")
        #if postprocess:
            #reasoning = post_process(reasoning)
        function_completion_message=""
        if processed_messages > 0:
            function_completion_message += process_history(conversation_history)+"\n"
        function_completion_message += "Request: "+user_input+"\nReasoning: "+reasoning
        responses, function_results = process_functions(function_completion_message, action=action["action"], agent_actions=agent_actions)
        # if there are no subtasks, we can just reply,
        # otherwise we execute the subtasks
        # First we check if it's an object
        if isinstance(function_results, dict) and function_results.get("subtasks") and len(function_results["subtasks"]) > 0:
            # cycle subtasks and execute functions
            subtask_result=""
            for subtask in function_results["subtasks"]:
                #ctr="Context: "+user_input+"\nThought: "+action["reasoning"]+ "\nRequest: "+subtask["reasoning"]
                #cr="Request: "+user_input+"\n"
                cr=""
                if subtask_result != "" and subtaskContext:
                    # Include cumulative results of previous subtasks
                    # TODO: this grows context, maybe we should use a different approach or summarize
                    ##if postprocess:
                    ##    cr+= "Subtask results: "+post_process(subtask_result)+"\n"
                    ##else:
                    cr+="\n"+subtask_result+"\n"
                subtask_reasoning = subtask["reasoning"]
                cr+="Reasoning: "+action["reasoning"]+ "\n"
                cr+="\nFunction to call:" +subtask["function"]+"\n"
                logger.info("==> subtask '{subtask}' ({reasoning})", subtask=subtask["function"], reasoning=subtask_reasoning)
                if postprocess:
                    cr+= "Assistant: "+post_process(subtask_reasoning)
                else:
                    cr+= "Assistant: "+subtask_reasoning
                subtask_response, function_results = process_functions(cr, subtask["function"],agent_actions=agent_actions)
                subtask_result+=str(function_results)+"\n"
                # if postprocess:
                #    subtask_result=post_process(subtask_result)
                responses.append(subtask_response[-1])
        if re_evaluate:
            ## Better output or this infinite loops..
            logger.info("-> Re-evaluate if another action is needed")
            ## ? conversation history should go after the user_input maybe?
            re_eval = ""
            # This is probably not needed as already in the history:
            #re_eval = user_input +"\n"
            #re_eval += "Conversation history: \n"
            if postprocess:
                re_eval+= post_process(process_history(responses[1:])) +"\n"
            else:
                re_eval+= process_history(responses[1:]) +"\n"
            responses = evaluate(re_eval, responses, re_evaluate,agent_actions=agent_actions,re_evaluation_in_progress=True)
        if re_evaluation_in_progress:
            conversation_history.extend(responses)
            return conversation_history
        # unwrap the list of responses
        conversation_history.append(responses[-1])
        #responses = converse(responses)
        # TODO: this needs to be optimized
        responses = analyze(responses[1:], suffix=f"Return an appropriate answer given the context above\n")
        # add responses to conversation history by extending the list
        conversation_history.append(
            {
            "role": "assistant",
            "content": responses,
            }
        )
        # logger.info the latest response from the conversation history
        logger.info(conversation_history[-1]["content"])
        tts(conversation_history[-1]["content"])
    else:
        logger.info("==> no action needed")
        if re_evaluation_in_progress:
            logger.info("==> LocalAGI has completed the user request")
            logger.info("==> LocalAGI will reply to the user")
            return conversation_history        
        # get the response from the model
        response = converse(conversation_history)
        # add the response to the conversation history by extending the list
        conversation_history.extend(response)
        # logger.info the latest response from the conversation history
        logger.info(conversation_history[-1]["content"])
        tts(conversation_history[-1]["content"])
    return conversation_history
 ### Agent action definitions
 agent_actions = {
    "search_internet": {
@@ -849,89 +355,78 @@ agent_actions = {
            }
        }, 
    },
    PLAN_ACTION: {
        "function": generate_plan,
        "plannable": False,
        "description": 'The assistant for solving complex tasks that involves calling more functions in sequence, replies with the action "'+PLAN_ACTION+'".',
        "signature": {
            "name": PLAN_ACTION,
            "description": """Plan complex tasks.""",
            "parameters": {
                "type": "object",
                "properties": {
                    "description": {
                        "type": "string",
                        "description": "reasoning behind the planning"
                    },
                },
                "required": ["description"]
            }
        },
    },
    REPLY_ACTION: {
        "function": None,
        "plannable": False,
        "description": 'For replying to the user, the assistant replies with the action "'+REPLY_ACTION+'" and the reply to the user directly when there is nothing to do.',
    },
 }
-conversation_history = []
+if __name__ == "__main__":
    conversation_history = []
-# Set a system prompt if SYSTEM_PROMPT is set
+    # Create a LocalAGI instance
-if SYSTEM_PROMPT != "":
+    logger.info("Creating LocalAGI instance")
    localagi = LocalAGI(
        agent_actions=agent_actions,
        embeddings_model=EMBEDDINGS_MODEL,
        embeddings_api_base=EMBEDDINGS_API_BASE,
        llm_model=LLM_MODEL,
        tts_model=VOICE_MODEL,
        tts_api_base=TTS_API_BASE,
        functions_model=FUNCTIONS_MODEL,
        api_base=LOCALAI_API_BASE,
        stablediffusion_api_base=IMAGE_API_BASE,
        stablediffusion_model=STABLEDIFFUSION_MODEL,
        force_action=args.force_action,
        plan_message=args.plan_message,
    )
    # Set a system prompt if SYSTEM_PROMPT is set
    if SYSTEM_PROMPT != "":
        conversation_history.append({
            "role": "system",
            "content": SYSTEM_PROMPT
        })
-logger.info("Welcome to LocalAGI")
+    logger.info("Welcome to LocalAGI")
-# Skip avatar creation if --skip-avatar is set
+    # Skip avatar creation if --skip-avatar is set
-if not args.skip_avatar:
+    if not args.skip_avatar:
        logger.info("Creating avatar, please wait...")
-    display_avatar()
+        display_avatar(localagi)
-actions = ""
+    actions = ""
-for action in agent_actions:
+    for action in agent_actions:
        actions+=" '"+action+"'"
-logger.info("LocalAGI internally can do the following actions:{actions}", actions=actions)
+    logger.info("LocalAGI internally can do the following actions:{actions}", actions=actions)
-if not args.prompt:
+    if not args.prompt:
        logger.info(">>> Interactive mode <<<")
-else:
+    else:
        logger.info(">>> Prompt mode <<<")
        logger.info(args.prompt)
-processed_messages = 0
+    # IF in prompt mode just evaluate, otherwise loop
-# IF in prompt mode just evaluate, otherwise loop
+    if args.prompt:
-if args.prompt:
+        conversation_history=localagi.evaluate(
    conversation_history=evaluate(
            args.prompt, 
            conversation_history, 
            re_evaluate=args.re_evaluate, 
        agent_actions=agent_actions,
            # Enable to lower context usage but increases LLM calls
            postprocess=args.postprocess,
            subtaskContext=args.subtaskContext,
        processed_messages=processed_messages,
            )
-    processed_messages+=1
+        localagi.tts_play(conversation_history[-1]["content"])
-if not args.prompt or args.interactive:
+    if not args.prompt or args.interactive:
        # TODO: process functions also considering the conversation history? conversation history + input
        logger.info(">>> Ready! What can I do for you? ( try with: plan a roadtrip to San Francisco ) <<<")
        while True:
            user_input = input(">>> ")
            # we are going to use the args to change the evaluation behavior
-        conversation_history=evaluate(
+            conversation_history=localagi.evaluate(
                user_input, 
                conversation_history, 
                re_evaluate=args.re_evaluate, 
            agent_actions=agent_actions,
                # Enable to lower context usage but increases LLM calls
                postprocess=args.postprocess,
                subtaskContext=args.subtaskContext,
            processed_messages=processed_messages,
                )
-        processed_messages+=1
+            localagi.tts_play(conversation_history[-1]["content"])
--- a/src/localagi/init.py
+++ b/src/localagi/init.py
@@ -0,0 +1 @@
 from .localagi import *
--- a/src/localagi/localagi.py
+++ b/src/localagi/localagi.py
@@ -0,0 +1,592 @@
 import os
 import openai
 import requests
 from loguru import logger
 import json
 DEFAULT_API_BASE = "http://api:8080"
 VOICE_MODEL = "en-us-kathleen-low.onnx"
 STABLEDIFFUSION_MODEL = "stablediffusion"
 FUNCTIONS_MODEL = "functions"
 EMBEDDINGS_MODEL = "all-MiniLM-L6-v2"
 LLM_MODEL = "gpt-4"
 # LocalAGI class
 class LocalAGI:
    # Constructor
    def __init__(self, 
                 plan_action="plan", 
                 reply_action="reply",
                 force_action="",
                 agent_actions={}, 
                 plan_message="",
                 api_base=DEFAULT_API_BASE, 
                 tts_api_base="", 
                 stablediffusion_api_base="",
                 embeddings_api_base="", 
                 tts_model=VOICE_MODEL, 
                 stablediffusion_model=STABLEDIFFUSION_MODEL, 
                 functions_model=FUNCTIONS_MODEL, 
                 embeddings_model=EMBEDDINGS_MODEL, 
                 llm_model=LLM_MODEL,
                 tts_player="aplay",
                 ):
        self.api_base = api_base
        self.agent_actions = agent_actions
        self.plan_message = plan_message
        self.force_action = force_action
        self.processed_messages=0
        self.tts_player = tts_player
        self.agent_actions[plan_action] = {
                                            "function": self.generate_plan,
                                            "plannable": False,
                                            "description": 'The assistant for solving complex tasks that involves calling more functions in sequence, replies with the action "'+plan_action+'".',
                                            "signature": {
                                                "name": plan_action,
                                                "description": """Plan complex tasks.""",
                                                "parameters": {
                                                    "type": "object",
                                                    "properties": {
                                                        "description": {
                                                            "type": "string",
                                                            "description": "reasoning behind the planning"
                                                        },
                                                    },
                                                    "required": ["description"]
                                                }
                                            },
                                        }
        self.agent_actions[reply_action] = {
                                        "function": None,
                                        "plannable": False,
                                        "description": 'For replying to the user, the assistant replies with the action "'+reply_action+'" and the reply to the user directly when there is nothing to do.',
                                    }
        self.tts_api_base = tts_api_base if tts_api_base else self.api_base
        self.stablediffusion_api_base = stablediffusion_api_base if stablediffusion_api_base else self.api_base
        self.embeddings_api_base = embeddings_api_base if embeddings_api_base else self.api_base
        self.tts_model = tts_model
        self.stablediffusion_model = stablediffusion_model
        self.functions_model = functions_model
        self.embeddings_model = embeddings_model
        self.llm_model = llm_model
        self.reply_action = reply_action
    # Function to create images with LocalAI
    def get_avatar(self, input_text):
        response = openai.Image.create(
            prompt=input_text,
            n=1,
            size="128x128",
            api_base=self.sta+"/v1"
        )
        return response['data'][0]['url']
    def tts_play(self, input_text):
        output_file_path = '/tmp/output.wav'
        self.tts(input_text, output_file_path)
        try:
            # Use aplay to play the audio
            os.system(f"{self.tts_player} {output_file_path}")
            # remove the audio file
            os.remove(output_file_path)
        except:
            logger.info('Unable to play audio')
    # Function to create audio with LocalAI
    def tts(self, input_text, output_file_path):
        # strip newlines from text
        input_text = input_text.replace("\n", ".")
        # get from OPENAI_API_BASE env var
        url = self.tts_api_base + '/tts'
        headers = {'Content-Type': 'application/json'}
        data = {
            "input": input_text,
            "model": self.tts_model,
        }
        response = requests.post(url, headers=headers, data=json.dumps(data))
        if response.status_code == 200:
            with open(output_file_path, 'wb') as f:
                f.write(response.content)
            logger.info('Audio file saved successfully:', output_file_path)
        else:
            logger.info('Request failed with status code', response.status_code)
    # Function to analyze the user input and pick the next action to do
    def needs_to_do_action(self, user_input, agent_actions={}):
        if len(agent_actions) == 0:
            agent_actions = self.agent_actions
        # Get the descriptions and the actions name (the keys)
        descriptions=self.action_description("", agent_actions)
        messages = [
                {"role": "user",
                "content": f"""Transcript of AI assistant responding to user requests. Replies with the action to perform and the reasoning.
    {descriptions}"""},
                {"role": "user",
    "content": f"""{user_input}
    Function call: """
                }
            ]
        functions = [
            {
            "name": "intent",
            "description": """Decide to do an action.""",
            "parameters": {
                "type": "object",
                "properties": {
                "confidence": {
                    "type": "number",
                    "description": "confidence of the action"
                },
                "reasoning": {
                    "type": "string",
                    "description": "reasoning behind the intent"
                },
                # "observation": {
                #     "type": "string",
                #     "description": "reasoning behind the intent"
                # },
                "action": {
                    "type": "string",
                    "enum": list(agent_actions.keys()),
                    "description": "user intent"
                },
                },
                "required": ["action"]
            }
            },    
        ]
        response = openai.ChatCompletion.create(
            #model="gpt-3.5-turbo",
            model=self.functions_model,
            messages=messages,
            request_timeout=1200,
            functions=functions,
            api_base=self.api_base+"/v1",
            stop=None,
            temperature=0.1,
            #function_call="auto"
            function_call={"name": "intent"},
        )
        response_message = response["choices"][0]["message"]
        if response_message.get("function_call"):
            function_name = response.choices[0].message["function_call"].name
            function_parameters = response.choices[0].message["function_call"].arguments
            # read the json from the string
            res = json.loads(function_parameters)
            logger.debug(">>> function name: "+function_name)
            logger.debug(">>> function parameters: "+function_parameters)
            return res
        return {"action": self.reply_action}
    # This is used to collect the descriptions of the agent actions, used to populate the LLM prompt
    def action_description(self, action, agent_actions):
        descriptions=""
        # generate descriptions of actions that the agent can pick
        for a in agent_actions:
            if ( action != "" and action == a ) or (action == ""):
                descriptions+=agent_actions[a]["description"]+"\n"
        return descriptions
    ### This function is used to process the functions given a user input.
    ### It picks a function, executes it and returns the list of messages containing the result.
    def process_functions(self, user_input, action="",):
        descriptions=self.action_description(action, self.agent_actions)
        messages = [
            #   {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user",
                "content": f"""Transcript of AI assistant responding to user requests. Replies with the action to perform, including reasoning, and the confidence interval from 0 to 100.
    {descriptions}"""},
                {"role": "user",
    "content": f"""{user_input}
    Function call: """
                }
            ]
        response = self.function_completion(messages, action=action)
        response_message = response["choices"][0]["message"]
        response_result = ""
        function_result = {}
        if response_message.get("function_call"):
            function_name = response.choices[0].message["function_call"].name
            function_parameters = response.choices[0].message["function_call"].arguments
            logger.debug("==> function parameters: {function_parameters}",function_parameters=function_parameters)
            function_to_call = self.agent_actions[function_name]["function"]
            function_result = function_to_call(function_parameters, agent_actions=self.agent_actions, localagi=self)
            logger.info("==> function result: {function_result}", function_result=function_result)
            messages.append(
                {
                    "role": "assistant",
                    "content": None,
                    "function_call": {"name": function_name, "arguments": function_parameters,},
                }
            )
            messages.append(
                {
                    "role": "function",
                    "name": function_name,
                    "content": str(function_result)
                }
            )
        return messages, function_result
    ### function_completion is used to autocomplete functions given a list of messages
    def function_completion(self, messages, action=""):
        function_call = "auto"
        if action != "":
            function_call={"name": action}
        logger.debug("==> function name: {function_call}", function_call=function_call)
        # get the functions from the signatures of the agent actions, if exists
        functions = []
        for action in self.agent_actions:
            if self.agent_actions[action].get("signature"):
                functions.append(self.agent_actions[action]["signature"])
        response = openai.ChatCompletion.create(
            #model="gpt-3.5-turbo",
            model=self.functions_model,
            messages=messages,
            functions=functions,
            request_timeout=1200,
            stop=None,
            api_base=self.api_base+"/v1",
            temperature=0.1,
            function_call=function_call
        )
        return response
    # Rework the content of each message in the history in a way that is understandable by the LLM
    # TODO: switch to templates (?)
    def process_history(self, conversation_history):
        messages = ""
        for message in conversation_history:
            # if there is content append it
            if message.get("content") and message["role"] == "function":
                messages+="Function result: \n" + message["content"]+"\n"
            elif message.get("function_call"):
                # encode message["function_call" to json and appends it
                fcall = json.dumps(message["function_call"])
                parameters = "calling " + message["function_call"]["name"]+" with arguments:"
                args=json.loads(message["function_call"]["arguments"])
                for arg in args:
                    logger.debug(arg)
                    logger.debug(args)
                    v=args[arg]
                    parameters+=f""" {arg}=\"{v}\""""
                messages+= parameters+"\n"
            elif message.get("content") and message["role"] == "user":
                messages+=message["content"]+"\n"
            elif message.get("content") and message["role"] == "assistant":
                messages+="Assistant message: "+message["content"]+"\n"
        return messages
    def converse(self, responses):
        response = openai.ChatCompletion.create(
            model=self.llm_model,
            messages=responses,
            stop=None,
            api_base=self.api_base+"/v1",
            request_timeout=1200,
            temperature=0.1,
        )
        responses.append(
            {
                "role": "assistant",
                "content": response.choices[0].message["content"],
            }
        )
        return responses
    ### Fine tune a string before feeding into the LLM
    def analyze(self, responses, prefix="Analyze the following text highlighting the relevant information and identify a list of actions to take if there are any. If there are errors, suggest solutions to fix them", suffix=""):
        string = self.process_history(responses)
        messages = []
        if prefix != "":
            messages = [
                {
                "role": "user",
                "content": f"""{prefix}:
        ```
        {string}
        ```
        """,
                }
            ]
        else:
            messages = [
                {
                "role": "user",
                "content": f"""{string}""",
                }
            ]
        if suffix != "":
            messages[0]["content"]+=f"""{suffix}"""
        response = openai.ChatCompletion.create(
            model=self.llm_model,
            messages=messages,
            stop=None,
            api_base=self.api_base+"/v1",
            request_timeout=1200,
            temperature=0.1,
        )
        return  response.choices[0].message["content"]
    def post_process(self, string):
        messages = [
            {
            "role": "user",
            "content": f"""Summarize the following text, keeping the relevant information:
    ```
    {string}
    ```
    """,
            }
        ]
        logger.info("==> Post processing: {string}", string=string)
        # get the response from the model
        response = openai.ChatCompletion.create(
            model=self.llm_model,
            messages=messages,
            api_base=self.api_base+"/v1",
            stop=None,
            temperature=0.1,
            request_timeout=1200,
        )
        result = response["choices"][0]["message"]["content"]
        logger.info("==> Processed: {string}", string=result)
        return result
    def generate_plan(self, user_input, agent_actions={}, localagi=None):
        res = json.loads(user_input)
        logger.info("--> Calculating plan: {description}", description=res["description"])
        descriptions=self.action_description("",agent_actions)
        plan_message = "The assistant replies with a plan to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
        if self.plan_message:
            plan_message = self.plan_message
            # plan_message = "The assistant replies with a plan of 3 steps to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
        messages = [
                {"role": "user",
                "content": f"""Transcript of AI assistant responding to user requests. 
    {descriptions}
    Request: {plan_message}
    Thought: {res["description"]}
    Function call: """
                }
            ]
        # get list of plannable actions
        plannable_actions = []
        for action in agent_actions:
            if agent_actions[action]["plannable"]:
                # append the key of the dict to plannable_actions
                plannable_actions.append(action)
        functions = [
            {
            "name": "plan",
            "description": """Decide to do an action.""",
            "parameters": {
                "type": "object",
                "properties": {
                    "subtasks": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "reasoning": {
                                    "type": "string",
                                    "description": "subtask list",
                                },
                                "function": {
                                    "type": "string",
                                    "enum": plannable_actions,
                                },               
                            },
                        },
                    },
                },
                "required": ["subtasks"]
            }
            },    
        ]
        response = openai.ChatCompletion.create(
            #model="gpt-3.5-turbo",
            model=self.functions_model,
            messages=messages,
            functions=functions,
            api_base=self.api_base+"/v1",
            stop=None,
            temperature=0.1,
            #function_call="auto"
            function_call={"name": "plan"},
        )
        response_message = response["choices"][0]["message"]
        if response_message.get("function_call"):
            function_name = response.choices[0].message["function_call"].name
            function_parameters = response.choices[0].message["function_call"].arguments
            # read the json from the string
            res = json.loads(function_parameters)
            logger.debug("<<< function name: {function_name} >>>> parameters: {parameters}", function_name=function_name,parameters=function_parameters)
            return res
        return {"action": self.reply_action}
    def evaluate(self,user_input, conversation_history = [],re_evaluate=False,re_evaluation_in_progress=False, postprocess=False, subtaskContext=False):
        messages = [
            {
            "role": "user",
            "content": user_input,
            }
        ]
        conversation_history.extend(messages)
        # pulling the old history make the context grow exponentially
        # and most importantly it repeates the first message with the commands again and again.
        # it needs a bit of cleanup and process the messages and piggyback more LocalAI functions templates
        # old_history = process_history(conversation_history)
        # action_picker_message = "Conversation history:\n"+old_history
        # action_picker_message += "\n"
        action_picker_message = "Request: "+user_input
        picker_actions = self.agent_actions
        if self.force_action:
            aa = {}
            aa[self.force_action] = self.agent_actions[self.force_action]
            picker_actions = aa
            logger.info("==> Forcing action to '{action}' as requested by the user", action=self.force_action)
        #if re_evaluate and not re_evaluation_in_progress:
        #    observation = analyze(conversation_history, prefix=True)
        #    action_picker_message+="\n\Thought: "+observation[-1]["content"]
        if re_evaluation_in_progress:
            observation = self.analyze(conversation_history)
            action_picker_message="Decide from the output below if we have to do another action:\n"
            action_picker_message+="```\n"+user_input+"\n```"
            action_picker_message+="\n\nObservation: "+observation
            # if there is no action to do, we can just reply to the user with REPLY_ACTION
        try:
            action = self.needs_to_do_action(action_picker_message,agent_actions=picker_actions)
        except Exception as e:
            logger.error("==> error: ")
            logger.error(e)
            action = {"action": self.reply_action}
        if action["action"] != self.reply_action:
            logger.info("==> LocalAGI wants to call '{action}'", action=action["action"])
            #logger.info("==> Observation '{reasoning}'", reasoning=action["observation"])
            logger.info("==> Reasoning '{reasoning}'", reasoning=action["reasoning"])
            # Force executing a plan instead
            reasoning = action["reasoning"]
            if action["action"] == self.reply_action:
                logger.info("==> LocalAGI wants to create a plan that involves more actions ")
            #if postprocess:
                #reasoning = post_process(reasoning)
            function_completion_message=""
            if self.processed_messages > 0:
                function_completion_message += self.process_history(conversation_history)+"\n"
            function_completion_message += "Request: "+user_input+"\nReasoning: "+reasoning
            responses, function_results = self.process_functions(function_completion_message, action=action["action"])
            # if there are no subtasks, we can just reply,
            # otherwise we execute the subtasks
            # First we check if it's an object
            if isinstance(function_results, dict) and function_results.get("subtasks") and len(function_results["subtasks"]) > 0:
                # cycle subtasks and execute functions
                subtask_result=""
                for subtask in function_results["subtasks"]:
                    #ctr="Context: "+user_input+"\nThought: "+action["reasoning"]+ "\nRequest: "+subtask["reasoning"]
                    #cr="Request: "+user_input+"\n"
                    cr=""
                    if subtask_result != "" and subtaskContext:
                        # Include cumulative results of previous subtasks
                        # TODO: this grows context, maybe we should use a different approach or summarize
                        ##if postprocess:
                        ##    cr+= "Subtask results: "+post_process(subtask_result)+"\n"
                        ##else:
                        cr+="\n"+subtask_result+"\n"
                    subtask_reasoning = subtask["reasoning"]
                    cr+="Reasoning: "+action["reasoning"]+ "\n"
                    cr+="\nFunction to call:" +subtask["function"]+"\n"
                    logger.info("==> subtask '{subtask}' ({reasoning})", subtask=subtask["function"], reasoning=subtask_reasoning)
                    if postprocess:
                        cr+= "Assistant: "+self.post_process(subtask_reasoning)
                    else:
                        cr+= "Assistant: "+subtask_reasoning
                    subtask_response, function_results = self.process_functions(cr, subtask["function"])
                    subtask_result+=str(function_results)+"\n"
                    # if postprocess:
                    #    subtask_result=post_process(subtask_result)
                    responses.append(subtask_response[-1])
            if re_evaluate:
                ## Better output or this infinite loops..
                logger.info("-> Re-evaluate if another action is needed")
                ## ? conversation history should go after the user_input maybe?
                re_eval = ""
                # This is probably not needed as already in the history:
                #re_eval = user_input +"\n"
                #re_eval += "Conversation history: \n"
                if postprocess:
                    re_eval+= self.post_process(self.process_history(responses[1:])) +"\n"
                else:
                    re_eval+= self.process_history(responses[1:]) +"\n"
                responses = self.evaluate(re_eval, 
                                          responses, 
                                          re_evaluate,
                                          re_evaluation_in_progress=True)
            if re_evaluation_in_progress:
                conversation_history.extend(responses)
                return conversation_history
            # unwrap the list of responses
            conversation_history.append(responses[-1])
            #responses = converse(responses)
            # TODO: this needs to be optimized
            responses = self.analyze(responses[1:], suffix=f"Return an appropriate answer given the context above\n")
            # add responses to conversation history by extending the list
            conversation_history.append(
                {
                "role": "assistant",
                "content": responses,
                }
            )
            self.processed_messages+=1
            # logger.info the latest response from the conversation history
            logger.info(conversation_history[-1]["content"])
            #self.tts(conversation_history[-1]["content"])
        else:
            logger.info("==> no action needed")
            if re_evaluation_in_progress:
                logger.info("==> LocalAGI has completed the user request")
                logger.info("==> LocalAGI will reply to the user")
                return conversation_history        
            # get the response from the model
            response = self.converse(conversation_history)
            self.processed_messages+=1
            # add the response to the conversation history by extending the list
            conversation_history.extend(response)
            # logger.info the latest response from the conversation history
            logger.info(conversation_history[-1]["content"])
            #self.tts(conversation_history[-1]["content"])
        return conversation_history