Add ingest command

2023-08-23 21:55:57 +02:00
parent a823131a2d
commit 42d35dd7a3
1 changed files with 53 additions and 6 deletions
--- a/examples/discord/agent.py
+++ b/examples/discord/agent.py
@@ -1,6 +1,13 @@
 import openai
 #from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.embeddings import LocalAIEmbeddings
+
+from langchain.document_loaders import (
+    SitemapLoader,
+   # GitHubIssuesLoader,
+   # GitLoader,
+)
+
 import uuid
 import sys
 from queue import Queue
@@ -30,16 +37,32 @@ FILE_NAME_FORMAT = '%Y_%m_%d_%H_%M_%S'

 EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", "all-MiniLM-L6-v2")
 EMBEDDINGS_API_BASE = os.environ.get("EMBEDDINGS_API_BASE", "http://api:8080")
-PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/data/")
+PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/tmp/data/")
+DB_DIR = os.environ.get("DB_DIR", "/tmp/data/db")

 embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL,openai_api_base=EMBEDDINGS_API_BASE)
-chroma_client = Chroma(collection_name="memories", persist_directory="/data/db", embedding_function=embeddings)

 loop = None
 channel = None
 def call(thing):
    return asyncio.run_coroutine_threadsafe(thing,loop).result()

+def ingest(a, agent_actions={}, localagi=None):
+    q = json.loads(a)
+    chunk_size = 500
+    chunk_overlap = 50
+    logger.info(">>> ingesting: ")
+    logger.info(q)
+    documents = []
+    sitemap_loader = SitemapLoader(web_path=q["url"])
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    documents.extend(sitemap_loader.load())
+    texts = text_splitter.split_documents(documents)
+    db = Chroma.from_documents(texts,embeddings,collection_name="memories", persist_directory=DB_DIR)
+    db.persist()
+    db = None
+    return f"Documents ingested"
+
 def create_image(a, agent_actions={}, localagi=None):
    q = json.loads(a)
    logger.info(">>> creating image: ") 
@@ -63,6 +86,8 @@ def download_image(url: str):
    full_path = f"{PERSISTENT_DIR}{file_name}"
    urllib.request.urlretrieve(url, full_path)
    return file_name
+
+
 ### Agent capabilities
 ### These functions are called by the agent to perform actions
 ###
@@ -70,17 +95,20 @@ def save(memory, agent_actions={}, localagi=None):
    q = json.loads(memory)
    logger.info(">>> saving to memories: ") 
    logger.info(q["content"])
+    chroma_client = Chroma(collection_name="memories",embedding_function=embeddings, persist_directory=DB_DIR)
    chroma_client.add_texts([q["content"]],[{"id": str(uuid.uuid4())}])
    chroma_client.persist()
+    chroma_client = None
    return f"The object was saved permanently to memory."

 def search_memory(query, agent_actions={}, localagi=None):
    q = json.loads(query)
+    chroma_client = Chroma(collection_name="memories",embedding_function=embeddings, persist_directory=DB_DIR)
    docs = chroma_client.similarity_search(q["reasoning"])
    text_res="Memories found in the database:\n"
    for doc in docs:
        text_res+="- "+doc.page_content+"\n"
-
+    chroma_client = None
    #if args.postprocess:
    #    return post_process(text_res)
    #return text_res
@@ -178,12 +206,12 @@ def search_duckduckgo(a, agent_actions={}, localagi=None):

 ### Agent action definitions
 agent_actions = {
-    "create_image": {
+    "generate_picture": {
        "function": create_image,
        "plannable": True,
-        "description": 'If the user wants to generate an image, the assistant replies with "create_image", a detailed caption, the width and height of the image to generate.',
+        "description": 'For creating a picture, the assistant replies with "generate_picture" and a detailed caption, enhancing it with as much detail as possible.',
        "signature": {
-            "name": "create_image",
+            "name": "generate_picture",
            "parameters": {
                "type": "object",
                "properties": {
@@ -240,6 +268,25 @@ agent_actions = {
            }
        },
    },
+    "ingest": {
+        "function": ingest,
+        "plannable": True,
+        "description": 'The assistant replies with the action "ingest" when there is an url to a sitemap to ingest memories from.',
+        "signature": {
+            "name": "ingest",
+            "description": """Save or store informations into memory.""",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "url": {
+                        "type": "string",
+                        "description": "information to save"
+                    },
+                },
+                "required": ["url"]
+            }
+        },
+    },
    "save_memory": {
        "function": save,
        "plannable": True,