diff --git a/examples/discord/agent.py b/examples/discord/agent.py index 1a248fb..1fb32da 100644 --- a/examples/discord/agent.py +++ b/examples/discord/agent.py @@ -1,6 +1,13 @@ import openai #from langchain.embeddings import HuggingFaceEmbeddings from langchain.embeddings import LocalAIEmbeddings + +from langchain.document_loaders import ( + SitemapLoader, + # GitHubIssuesLoader, + # GitLoader, +) + import uuid import sys from queue import Queue @@ -30,16 +37,32 @@ FILE_NAME_FORMAT = '%Y_%m_%d_%H_%M_%S' EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", "all-MiniLM-L6-v2") EMBEDDINGS_API_BASE = os.environ.get("EMBEDDINGS_API_BASE", "http://api:8080") -PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/data/") +PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/tmp/data/") +DB_DIR = os.environ.get("DB_DIR", "/tmp/data/db") embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL,openai_api_base=EMBEDDINGS_API_BASE) -chroma_client = Chroma(collection_name="memories", persist_directory="/data/db", embedding_function=embeddings) loop = None channel = None def call(thing): return asyncio.run_coroutine_threadsafe(thing,loop).result() +def ingest(a, agent_actions={}, localagi=None): + q = json.loads(a) + chunk_size = 500 + chunk_overlap = 50 + logger.info(">>> ingesting: ") + logger.info(q) + documents = [] + sitemap_loader = SitemapLoader(web_path=q["url"]) + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) + documents.extend(sitemap_loader.load()) + texts = text_splitter.split_documents(documents) + db = Chroma.from_documents(texts,embeddings,collection_name="memories", persist_directory=DB_DIR) + db.persist() + db = None + return f"Documents ingested" + def create_image(a, agent_actions={}, localagi=None): q = json.loads(a) logger.info(">>> creating image: ") @@ -63,6 +86,8 @@ def download_image(url: str): full_path = f"{PERSISTENT_DIR}{file_name}" urllib.request.urlretrieve(url, full_path) return file_name + + ### Agent capabilities ### These functions are called by the agent to perform actions ### @@ -70,17 +95,20 @@ def save(memory, agent_actions={}, localagi=None): q = json.loads(memory) logger.info(">>> saving to memories: ") logger.info(q["content"]) + chroma_client = Chroma(collection_name="memories",embedding_function=embeddings, persist_directory=DB_DIR) chroma_client.add_texts([q["content"]],[{"id": str(uuid.uuid4())}]) chroma_client.persist() + chroma_client = None return f"The object was saved permanently to memory." def search_memory(query, agent_actions={}, localagi=None): q = json.loads(query) + chroma_client = Chroma(collection_name="memories",embedding_function=embeddings, persist_directory=DB_DIR) docs = chroma_client.similarity_search(q["reasoning"]) text_res="Memories found in the database:\n" for doc in docs: text_res+="- "+doc.page_content+"\n" - + chroma_client = None #if args.postprocess: # return post_process(text_res) #return text_res @@ -178,12 +206,12 @@ def search_duckduckgo(a, agent_actions={}, localagi=None): ### Agent action definitions agent_actions = { - "create_image": { + "generate_picture": { "function": create_image, "plannable": True, - "description": 'If the user wants to generate an image, the assistant replies with "create_image", a detailed caption, the width and height of the image to generate.', + "description": 'For creating a picture, the assistant replies with "generate_picture" and a detailed caption, enhancing it with as much detail as possible.', "signature": { - "name": "create_image", + "name": "generate_picture", "parameters": { "type": "object", "properties": { @@ -240,6 +268,25 @@ agent_actions = { } }, }, + "ingest": { + "function": ingest, + "plannable": True, + "description": 'The assistant replies with the action "ingest" when there is an url to a sitemap to ingest memories from.', + "signature": { + "name": "ingest", + "description": """Save or store informations into memory.""", + "parameters": { + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "information to save" + }, + }, + "required": ["url"] + } + }, + }, "save_memory": { "function": save, "plannable": True,