Add ingest command
This commit is contained in:
@@ -1,6 +1,13 @@
|
|||||||
import openai
|
import openai
|
||||||
#from langchain.embeddings import HuggingFaceEmbeddings
|
#from langchain.embeddings import HuggingFaceEmbeddings
|
||||||
from langchain.embeddings import LocalAIEmbeddings
|
from langchain.embeddings import LocalAIEmbeddings
|
||||||
|
|
||||||
|
from langchain.document_loaders import (
|
||||||
|
SitemapLoader,
|
||||||
|
# GitHubIssuesLoader,
|
||||||
|
# GitLoader,
|
||||||
|
)
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
import sys
|
import sys
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
@@ -30,16 +37,32 @@ FILE_NAME_FORMAT = '%Y_%m_%d_%H_%M_%S'
|
|||||||
|
|
||||||
EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", "all-MiniLM-L6-v2")
|
EMBEDDINGS_MODEL = os.environ.get("EMBEDDINGS_MODEL", "all-MiniLM-L6-v2")
|
||||||
EMBEDDINGS_API_BASE = os.environ.get("EMBEDDINGS_API_BASE", "http://api:8080")
|
EMBEDDINGS_API_BASE = os.environ.get("EMBEDDINGS_API_BASE", "http://api:8080")
|
||||||
PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/data/")
|
PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/tmp/data/")
|
||||||
|
DB_DIR = os.environ.get("DB_DIR", "/tmp/data/db")
|
||||||
|
|
||||||
embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL,openai_api_base=EMBEDDINGS_API_BASE)
|
embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL,openai_api_base=EMBEDDINGS_API_BASE)
|
||||||
chroma_client = Chroma(collection_name="memories", persist_directory="/data/db", embedding_function=embeddings)
|
|
||||||
|
|
||||||
loop = None
|
loop = None
|
||||||
channel = None
|
channel = None
|
||||||
def call(thing):
|
def call(thing):
|
||||||
return asyncio.run_coroutine_threadsafe(thing,loop).result()
|
return asyncio.run_coroutine_threadsafe(thing,loop).result()
|
||||||
|
|
||||||
|
def ingest(a, agent_actions={}, localagi=None):
|
||||||
|
q = json.loads(a)
|
||||||
|
chunk_size = 500
|
||||||
|
chunk_overlap = 50
|
||||||
|
logger.info(">>> ingesting: ")
|
||||||
|
logger.info(q)
|
||||||
|
documents = []
|
||||||
|
sitemap_loader = SitemapLoader(web_path=q["url"])
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
||||||
|
documents.extend(sitemap_loader.load())
|
||||||
|
texts = text_splitter.split_documents(documents)
|
||||||
|
db = Chroma.from_documents(texts,embeddings,collection_name="memories", persist_directory=DB_DIR)
|
||||||
|
db.persist()
|
||||||
|
db = None
|
||||||
|
return f"Documents ingested"
|
||||||
|
|
||||||
def create_image(a, agent_actions={}, localagi=None):
|
def create_image(a, agent_actions={}, localagi=None):
|
||||||
q = json.loads(a)
|
q = json.loads(a)
|
||||||
logger.info(">>> creating image: ")
|
logger.info(">>> creating image: ")
|
||||||
@@ -63,6 +86,8 @@ def download_image(url: str):
|
|||||||
full_path = f"{PERSISTENT_DIR}{file_name}"
|
full_path = f"{PERSISTENT_DIR}{file_name}"
|
||||||
urllib.request.urlretrieve(url, full_path)
|
urllib.request.urlretrieve(url, full_path)
|
||||||
return file_name
|
return file_name
|
||||||
|
|
||||||
|
|
||||||
### Agent capabilities
|
### Agent capabilities
|
||||||
### These functions are called by the agent to perform actions
|
### These functions are called by the agent to perform actions
|
||||||
###
|
###
|
||||||
@@ -70,17 +95,20 @@ def save(memory, agent_actions={}, localagi=None):
|
|||||||
q = json.loads(memory)
|
q = json.loads(memory)
|
||||||
logger.info(">>> saving to memories: ")
|
logger.info(">>> saving to memories: ")
|
||||||
logger.info(q["content"])
|
logger.info(q["content"])
|
||||||
|
chroma_client = Chroma(collection_name="memories",embedding_function=embeddings, persist_directory=DB_DIR)
|
||||||
chroma_client.add_texts([q["content"]],[{"id": str(uuid.uuid4())}])
|
chroma_client.add_texts([q["content"]],[{"id": str(uuid.uuid4())}])
|
||||||
chroma_client.persist()
|
chroma_client.persist()
|
||||||
|
chroma_client = None
|
||||||
return f"The object was saved permanently to memory."
|
return f"The object was saved permanently to memory."
|
||||||
|
|
||||||
def search_memory(query, agent_actions={}, localagi=None):
|
def search_memory(query, agent_actions={}, localagi=None):
|
||||||
q = json.loads(query)
|
q = json.loads(query)
|
||||||
|
chroma_client = Chroma(collection_name="memories",embedding_function=embeddings, persist_directory=DB_DIR)
|
||||||
docs = chroma_client.similarity_search(q["reasoning"])
|
docs = chroma_client.similarity_search(q["reasoning"])
|
||||||
text_res="Memories found in the database:\n"
|
text_res="Memories found in the database:\n"
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
text_res+="- "+doc.page_content+"\n"
|
text_res+="- "+doc.page_content+"\n"
|
||||||
|
chroma_client = None
|
||||||
#if args.postprocess:
|
#if args.postprocess:
|
||||||
# return post_process(text_res)
|
# return post_process(text_res)
|
||||||
#return text_res
|
#return text_res
|
||||||
@@ -178,12 +206,12 @@ def search_duckduckgo(a, agent_actions={}, localagi=None):
|
|||||||
|
|
||||||
### Agent action definitions
|
### Agent action definitions
|
||||||
agent_actions = {
|
agent_actions = {
|
||||||
"create_image": {
|
"generate_picture": {
|
||||||
"function": create_image,
|
"function": create_image,
|
||||||
"plannable": True,
|
"plannable": True,
|
||||||
"description": 'If the user wants to generate an image, the assistant replies with "create_image", a detailed caption, the width and height of the image to generate.',
|
"description": 'For creating a picture, the assistant replies with "generate_picture" and a detailed caption, enhancing it with as much detail as possible.',
|
||||||
"signature": {
|
"signature": {
|
||||||
"name": "create_image",
|
"name": "generate_picture",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -240,6 +268,25 @@ agent_actions = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"ingest": {
|
||||||
|
"function": ingest,
|
||||||
|
"plannable": True,
|
||||||
|
"description": 'The assistant replies with the action "ingest" when there is an url to a sitemap to ingest memories from.',
|
||||||
|
"signature": {
|
||||||
|
"name": "ingest",
|
||||||
|
"description": """Save or store informations into memory.""",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "information to save"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["url"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
"save_memory": {
|
"save_memory": {
|
||||||
"function": save,
|
"function": save,
|
||||||
"plannable": True,
|
"plannable": True,
|
||||||
|
|||||||
Reference in New Issue
Block a user