Add slack example

This commit is contained in:
mudler
2023-12-16 18:54:06 +01:00
parent 0f953d8ad9
commit 7e3f2cbffb
19 changed files with 1854 additions and 1 deletions

View File

396
examples/slack/app/agent.py Normal file
View File

@@ -0,0 +1,396 @@
import openai
#from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import LocalAIEmbeddings
from langchain.document_loaders import (
SitemapLoader,
# GitHubIssuesLoader,
# GitLoader,
)
import uuid
import sys
from app.env import *
from queue import Queue
import asyncio
import threading
from localagi import LocalAGI
from ascii_magic import AsciiArt
from duckduckgo_search import DDGS
from typing import Dict, List
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
import openai
import urllib.request
from datetime import datetime
import json
import os
from io import StringIO
FILE_NAME_FORMAT = '%Y_%m_%d_%H_%M_%S'
if not os.environ.get("PYSQL_HACK", "false") == "false":
# these three lines swap the stdlib sqlite3 lib with the pysqlite3 package for chroma
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
if MILVUS_HOST == "":
from langchain.vectorstores import Chroma
else:
from langchain.vectorstores import Milvus
embeddings = LocalAIEmbeddings(model=EMBEDDINGS_MODEL,openai_api_base=EMBEDDINGS_API_BASE)
loop = None
channel = None
def call(thing):
return asyncio.run_coroutine_threadsafe(thing,loop).result()
def ingest(a, agent_actions={}, localagi=None):
q = json.loads(a)
chunk_size = MEMORY_CHUNK_SIZE
chunk_overlap = MEMORY_CHUNK_OVERLAP
print(">>> ingesting: ")
print(q)
documents = []
sitemap_loader = SitemapLoader(web_path=q["url"])
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
documents.extend(sitemap_loader.load())
texts = text_splitter.split_documents(documents)
if MILVUS_HOST == "":
db = Chroma.from_documents(texts,embeddings,collection_name=MEMORY_COLLECTION, persist_directory=PERSISTENT_DIR)
db.persist()
db = None
else:
Milvus.from_documents(texts,embeddings,collection_name=MEMORY_COLLECTION, connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT})
return f"Documents ingested"
# def create_image(a, agent_actions={}, localagi=None):
# """
# Create an image based on a description using OpenAI's API.
# Args:
# a (str): A JSON string containing the description, width, and height for the image to be created.
# agent_actions (dict, optional): A dictionary of agent actions. Defaults to {}.
# localagi (LocalAGI, optional): An instance of the LocalAGI class. Defaults to None.
# Returns:
# str: A string containing the URL of the created image.
# """
# q = json.loads(a)
# print(">>> creating image: ")
# print(q["description"])
# size=f"{q['width']}x{q['height']}"
# response = openai.Image.create(prompt=q["description"], n=1, size=size)
# image_url = response["data"][0]["url"]
# image_name = download_image(image_url)
# image_path = f"{PERSISTENT_DIR}{image_name}"
# file = discord.File(image_path, filename=image_name)
# embed = discord.Embed(title="Generated image")
# embed.set_image(url=f"attachment://{image_name}")
# call(channel.send(file=file, content=f"Here is what I have generated", embed=embed))
# return f"Image created: {response['data'][0]['url']}"
def download_image(url: str):
file_name = f"{datetime.now().strftime(FILE_NAME_FORMAT)}.jpg"
full_path = f"{PERSISTENT_DIR}{file_name}"
urllib.request.urlretrieve(url, full_path)
return file_name
### Agent capabilities
### These functions are called by the agent to perform actions
###
def save(memory, agent_actions={}, localagi=None):
q = json.loads(memory)
print(">>> saving to memories: ")
print(q["content"])
if MILVUS_HOST == "":
chroma_client = Chroma(collection_name=MEMORY_COLLECTION,embedding_function=embeddings, persist_directory=PERSISTENT_DIR)
else:
chroma_client = Milvus(collection_name=MEMORY_COLLECTION,embedding_function=embeddings, connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT})
chroma_client.add_texts([q["content"]],[{"id": str(uuid.uuid4())}])
if MILVUS_HOST == "":
chroma_client.persist()
chroma_client = None
return f"The object was saved permanently to memory."
def search_memory(query, agent_actions={}, localagi=None):
q = json.loads(query)
if MILVUS_HOST == "":
chroma_client = Chroma(collection_name=MEMORY_COLLECTION,embedding_function=embeddings, persist_directory=PERSISTENT_DIR)
else:
chroma_client = Milvus(collection_name=MEMORY_COLLECTION,embedding_function=embeddings, connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT})
#docs = chroma_client.search(q["keywords"], "mmr")
retriever = chroma_client.as_retriever(search_type=MEMORY_SEARCH_TYPE, search_kwargs={"k": MEMORY_RESULTS})
docs = retriever.get_relevant_documents(q["keywords"])
text_res="Memories found in the database:\n"
sources = set() # To store unique sources
# Collect unique sources
for document in docs:
if "source" in document.metadata:
sources.add(document.metadata["source"])
for doc in docs:
# drop newlines from page_content
content = doc.page_content.replace("\n", " ")
content = " ".join(content.split())
text_res+="- "+content+"\n"
# Print the relevant sources used for the answer
for source in sources:
if source.startswith("http"):
text_res += "" + source + "\n"
chroma_client = None
#if args.postprocess:
# return post_process(text_res)
return text_res
#return localagi.post_process(text_res)
# write file to disk with content
def save_file(arg, agent_actions={}, localagi=None):
arg = json.loads(arg)
file = filename = arg["filename"]
content = arg["content"]
# create persistent dir if does not exist
if not os.path.exists(PERSISTENT_DIR):
os.makedirs(PERSISTENT_DIR)
# write the file in the directory specified
file = os.path.join(PERSISTENT_DIR, filename)
# Check if the file already exists
if os.path.exists(file):
mode = 'a' # Append mode
else:
mode = 'w' # Write mode
with open(file, mode) as f:
f.write(content)
file = discord.File(file, filename=filename)
call(channel.send(file=file, content=f"Here is what I have generated"))
return f"File {file} saved successfully."
def ddg(query: str, num_results: int, backend: str = "api") -> List[Dict[str, str]]:
"""Run query through DuckDuckGo and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
ddgs = DDGS()
try:
results = ddgs.text(
query,
backend=backend,
)
if results is None:
return [{"Result": "No good DuckDuckGo Search Result was found"}]
def to_metadata(result: Dict) -> Dict[str, str]:
if backend == "news":
return {
"date": result["date"],
"title": result["title"],
"snippet": result["body"],
"source": result["source"],
"link": result["url"],
}
return {
"snippet": result["body"],
"title": result["title"],
"link": result["href"],
}
formatted_results = []
for i, res in enumerate(results, 1):
if res is not None:
formatted_results.append(to_metadata(res))
if len(formatted_results) == num_results:
break
except Exception as e:
print(e)
return []
return formatted_results
## Search on duckduckgo
def search_duckduckgo(a, agent_actions={}, localagi=None):
a = json.loads(a)
list=ddg(a["query"], 2)
text_res=""
for doc in list:
text_res+=f"""{doc["link"]}: {doc["title"]} {doc["snippet"]}\n"""
print("Found")
print(text_res)
#if args.postprocess:
# return post_process(text_res)
return text_res
#l = json.dumps(list)
#return l
### End Agent capabilities
###
### Agent action definitions
agent_actions = {
# "generate_picture": {
# "function": create_image,
# "plannable": True,
# "description": 'For creating a picture, the assistant replies with "generate_picture" and a detailed description, enhancing it with as much detail as possible.',
# "signature": {
# "name": "generate_picture",
# "parameters": {
# "type": "object",
# "properties": {
# "description": {
# "type": "string",
# },
# "width": {
# "type": "number",
# },
# "height": {
# "type": "number",
# },
# },
# }
# },
# },
"search_internet": {
"function": search_duckduckgo,
"plannable": True,
"description": 'For searching the internet with a query, the assistant replies with the action "search_internet" and the query to search.',
"signature": {
"name": "search_internet",
"description": """For searching internet.""",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "information to save"
},
},
}
},
},
"save_file": {
"function": save_file,
"plannable": True,
"description": 'The assistant replies with the action "save_file", the filename and content to save for writing a file to disk permanently. This can be used to store the result of complex actions locally.',
"signature": {
"name": "save_file",
"description": """For saving a file to disk with content.""",
"parameters": {
"type": "object",
"properties": {
"filename": {
"type": "string",
"description": "information to save"
},
"content": {
"type": "string",
"description": "information to save"
},
},
}
},
},
"ingest": {
"function": ingest,
"plannable": True,
"description": 'The assistant replies with the action "ingest" when there is an url to a sitemap to ingest memories from.',
"signature": {
"name": "ingest",
"description": """Save or store informations into memory.""",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "information to save"
},
},
"required": ["url"]
}
},
},
"save_memory": {
"function": save,
"plannable": True,
"description": 'The assistant replies with the action "save_memory" and the string to remember or store an information that thinks it is relevant permanently.',
"signature": {
"name": "save_memory",
"description": """Save or store informations into memory.""",
"parameters": {
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "information to save"
},
},
"required": ["content"]
}
},
},
"search_memory": {
"function": search_memory,
"plannable": True,
"description": 'The assistant replies with the action "search_memory" for searching between its memories with a query term.',
"signature": {
"name": "search_memory",
"description": """Search in memory""",
"parameters": {
"type": "object",
"properties": {
"keywords": {
"type": "string",
"description": "reasoning behind the intent"
},
},
"required": ["keywords"]
}
},
},
}
def localagi(q):
localagi = LocalAGI(
agent_actions=agent_actions,
llm_model=LLM_MODEL,
tts_model=VOICE_MODEL,
tts_api_base=TTS_API_BASE,
functions_model=FUNCTIONS_MODEL,
api_base=LOCALAI_API_BASE,
stablediffusion_api_base=IMAGE_API_BASE,
stablediffusion_model=STABLEDIFFUSION_MODEL,
)
conversation_history = []
conversation_history=localagi.evaluate(
q,
conversation_history,
critic=False,
re_evaluate=False,
# Enable to lower context usage but increases LLM calls
postprocess=False,
subtaskContext=True,
)
return conversation_history[-1]["content"]

View File

@@ -0,0 +1,403 @@
import logging
import re
import time
from openai.error import Timeout
from slack_bolt import App, Ack, BoltContext, BoltResponse
from slack_bolt.request.payload_utils import is_event
from slack_sdk.web import WebClient
from app.env import (
OPENAI_TIMEOUT_SECONDS,
SYSTEM_TEXT,
TRANSLATE_MARKDOWN,
)
from app.i18n import translate
from app.openai_ops import (
ask_llm,
format_openai_message_content,
build_system_text,
)
from app.slack_ops import find_parent_message, is_no_mention_thread, post_wip_message, update_wip_message
#
# Listener functions
#
def just_ack(ack: Ack):
ack()
TIMEOUT_ERROR_MESSAGE = (
f":warning: Sorry! It looks like OpenAI didn't respond within {OPENAI_TIMEOUT_SECONDS} seconds. "
"Please try again later. :bow:"
)
DEFAULT_LOADING_TEXT = ":hourglass_flowing_sand: Wait a second, please ..."
def respond_to_app_mention(
context: BoltContext,
payload: dict,
client: WebClient,
logger: logging.Logger,
):
if payload.get("thread_ts") is not None:
parent_message = find_parent_message(
client, context.channel_id, payload.get("thread_ts")
)
if parent_message is not None:
if is_no_mention_thread(context, parent_message):
# The message event handler will reply to this
return
wip_reply = None
# Replace placeholder for Slack user ID in the system prompt
system_text = build_system_text(SYSTEM_TEXT, TRANSLATE_MARKDOWN, context)
messages = [{"role": "system", "content": system_text}]
print("system text:"+system_text, flush=True)
openai_api_key = context.get("OPENAI_API_KEY")
try:
if openai_api_key is None:
client.chat_postMessage(
channel=context.channel_id,
text="To use this app, please configure your OpenAI API key first",
)
return
user_id = context.actor_user_id or context.user_id
content = ""
if payload.get("thread_ts") is not None:
# Mentioning the bot user in a thread
replies_in_thread = client.conversations_replies(
channel=context.channel_id,
ts=payload.get("thread_ts"),
include_all_metadata=True,
limit=1000,
).get("messages", [])
reply = replies_in_thread[-1]
#for reply in replies_in_thread:
c = reply["text"]+"\n\n"
content += c
role = "assistant" if reply["user"] == context.bot_user_id else "user"
messages.append(
{
"role": role,
"content": (
format_openai_message_content(
reply["text"], TRANSLATE_MARKDOWN
)
),
}
)
else:
# Strip bot Slack user ID from initial message
msg_text = re.sub(f"<@{context.bot_user_id}>\\s*", "", payload["text"])
messages.append(
{
"role": "user",
"content": format_openai_message_content(msg_text, TRANSLATE_MARKDOWN),
}
)
loading_text = translate(
openai_api_key=openai_api_key, context=context, text=DEFAULT_LOADING_TEXT
)
wip_reply = post_wip_message(
client=client,
channel=context.channel_id,
thread_ts=payload["ts"],
loading_text=loading_text,
messages=messages,
user=context.user_id,
)
resp = ask_llm(messages=messages)
print("Reply "+resp)
update_wip_message(
client=client,
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=resp,
messages=messages,
user=user_id,
)
except Timeout:
if wip_reply is not None:
text = (
(
wip_reply.get("message", {}).get("text", "")
if wip_reply is not None
else ""
)
+ "\n\n"
+ translate(
openai_api_key=openai_api_key,
context=context,
text=TIMEOUT_ERROR_MESSAGE,
)
)
client.chat_update(
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=text,
)
except Exception as e:
text = (
(
wip_reply.get("message", {}).get("text", "")
if wip_reply is not None
else ""
)
+ "\n\n"
+ translate(
openai_api_key=openai_api_key,
context=context,
text=f":warning: Failed to start a conversation with ChatGPT: {e}",
)
)
logger.exception(text, e)
if wip_reply is not None:
client.chat_update(
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=text,
)
def respond_to_new_message(
context: BoltContext,
payload: dict,
client: WebClient,
logger: logging.Logger,
):
if payload.get("bot_id") is not None and payload.get("bot_id") != context.bot_id:
# Skip a new message by a different app
return
wip_reply = None
try:
is_in_dm_with_bot = payload.get("channel_type") == "im"
is_no_mention_required = False
thread_ts = payload.get("thread_ts")
if is_in_dm_with_bot is False and thread_ts is None:
return
openai_api_key = context.get("OPENAI_API_KEY")
if openai_api_key is None:
return
messages_in_context = []
if is_in_dm_with_bot is True and thread_ts is None:
# In the DM with the bot
past_messages = client.conversations_history(
channel=context.channel_id,
include_all_metadata=True,
limit=100,
).get("messages", [])
past_messages.reverse()
# Remove old messages
for message in past_messages:
seconds = time.time() - float(message.get("ts"))
if seconds < 86400: # less than 1 day
messages_in_context.append(message)
is_no_mention_required = True
else:
# In a thread with the bot in a channel
messages_in_context = client.conversations_replies(
channel=context.channel_id,
ts=thread_ts,
include_all_metadata=True,
limit=1000,
).get("messages", [])
if is_in_dm_with_bot is True:
is_no_mention_required = True
else:
the_parent_message_found = False
for message in messages_in_context:
if message.get("ts") == thread_ts:
the_parent_message_found = True
is_no_mention_required = is_no_mention_thread(context, message)
break
if the_parent_message_found is False:
parent_message = find_parent_message(
client, context.channel_id, thread_ts
)
if parent_message is not None:
is_no_mention_required = is_no_mention_thread(
context, parent_message
)
messages = []
user_id = context.actor_user_id or context.user_id
last_assistant_idx = -1
indices_to_remove = []
for idx, reply in enumerate(messages_in_context):
maybe_event_type = reply.get("metadata", {}).get("event_type")
if maybe_event_type == "chat-gpt-convo":
if context.bot_id != reply.get("bot_id"):
# Remove messages by a different app
indices_to_remove.append(idx)
continue
maybe_new_messages = (
reply.get("metadata", {}).get("event_payload", {}).get("messages")
)
if maybe_new_messages is not None:
if len(messages) == 0 or user_id is None:
new_user_id = (
reply.get("metadata", {})
.get("event_payload", {})
.get("user")
)
if new_user_id is not None:
user_id = new_user_id
messages = maybe_new_messages
last_assistant_idx = idx
if is_no_mention_required is False:
return
if is_in_dm_with_bot is False and last_assistant_idx == -1:
return
if is_in_dm_with_bot is True:
# To know whether this app needs to start a new convo
if not next(filter(lambda msg: msg["role"] == "system", messages), None):
# Replace placeholder for Slack user ID in the system prompt
system_text = build_system_text(
SYSTEM_TEXT, TRANSLATE_MARKDOWN, context
)
messages.insert(0, {"role": "system", "content": system_text})
filtered_messages_in_context = []
for idx, reply in enumerate(messages_in_context):
# Strip bot Slack user ID from initial message
if idx == 0:
reply["text"] = re.sub(
f"<@{context.bot_user_id}>\\s*", "", reply["text"]
)
if idx not in indices_to_remove:
filtered_messages_in_context.append(reply)
if len(filtered_messages_in_context) == 0:
return
for reply in filtered_messages_in_context:
msg_user_id = reply.get("user")
messages.append(
{
"content": format_openai_message_content(
reply.get("text"), TRANSLATE_MARKDOWN
),
"role": "user",
}
)
loading_text = translate(
openai_api_key=openai_api_key, context=context, text=DEFAULT_LOADING_TEXT
)
wip_reply = post_wip_message(
client=client,
channel=context.channel_id,
thread_ts=payload.get("thread_ts") if is_in_dm_with_bot else payload["ts"],
loading_text=loading_text,
messages=messages,
user=user_id,
)
latest_replies = client.conversations_replies(
channel=context.channel_id,
ts=wip_reply.get("ts"),
include_all_metadata=True,
limit=1000,
)
if latest_replies.get("messages", [])[-1]["ts"] != wip_reply["message"]["ts"]:
# Since a new reply will come soon, this app abandons this reply
client.chat_delete(
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
)
return
resp = ask_llm(messages=messages)
print("Reply "+resp)
update_wip_message(
client=client,
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=resp,
messages=messages,
user=user_id,
)
except Timeout:
if wip_reply is not None:
text = (
(
wip_reply.get("message", {}).get("text", "")
if wip_reply is not None
else ""
)
+ "\n\n"
+ translate(
openai_api_key=openai_api_key,
context=context,
text=TIMEOUT_ERROR_MESSAGE,
)
)
client.chat_update(
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=text,
)
except Exception as e:
text = (
(
wip_reply.get("message", {}).get("text", "")
if wip_reply is not None
else ""
)
+ "\n\n"
+ f":warning: Failed to reply: {e}"
)
logger.exception(text, e)
if wip_reply is not None:
client.chat_update(
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=text,
)
def register_listeners(app: App):
app.event("app_mention")(ack=just_ack, lazy=[respond_to_app_mention])
# app.event("message")(ack=just_ack, lazy=[respond_to_new_message])
MESSAGE_SUBTYPES_TO_SKIP = ["message_changed", "message_deleted"]
# To reduce unnecessary workload in this app,
# this before_authorize function skips message changed/deleted events.
# Especially, "message_changed" events can be triggered many times when the app rapidly updates its reply.
def before_authorize(
body: dict,
payload: dict,
logger: logging.Logger,
next_,
):
if (
is_event(body)
and payload.get("type") == "message"
and payload.get("subtype") in MESSAGE_SUBTYPES_TO_SKIP
):
logger.debug(
"Skipped the following middleware and listeners "
f"for this message event (subtype: {payload.get('subtype')})"
)
return BoltResponse(status=200, body="")
next_()

43
examples/slack/app/env.py Normal file
View File

@@ -0,0 +1,43 @@
import os
DEFAULT_SYSTEM_TEXT = """
"""
SYSTEM_TEXT = os.environ.get("OPENAI_SYSTEM_TEXT", DEFAULT_SYSTEM_TEXT)
DEFAULT_OPENAI_TIMEOUT_SECONDS = 30
OPENAI_TIMEOUT_SECONDS = int(
os.environ.get("OPENAI_TIMEOUT_SECONDS", DEFAULT_OPENAI_TIMEOUT_SECONDS)
)
DEFAULT_OPENAI_MODEL = "gpt-3.5-turbo"
OPENAI_MODEL = os.environ.get("OPENAI_MODEL", DEFAULT_OPENAI_MODEL)
USE_SLACK_LANGUAGE = os.environ.get("USE_SLACK_LANGUAGE", "true") == "true"
SLACK_APP_LOG_LEVEL = os.environ.get("SLACK_APP_LOG_LEVEL", "DEBUG")
TRANSLATE_MARKDOWN = os.environ.get("TRANSLATE_MARKDOWN", "false") == "true"
BASE_PATH = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
EMBEDDINGS_MODEL = os.environ.get('EMBEDDINGS_MODEL', "all-MiniLM-L6-v2")
EMBEDDINGS_API_BASE = os.environ.get("EMBEDDINGS_API_BASE", BASE_PATH)
LOCALAI_API_BASE = os.environ.get("LOCALAI_API_BASE", BASE_PATH)
TTS_API_BASE = os.environ.get("TTS_API_BASE", BASE_PATH)
IMAGE_API_BASE = os.environ.get("IMAGES_API_BASE", BASE_PATH)
STABLEDIFFUSION_MODEL = os.environ.get("STABLEDIFFUSION_MODEL", "dreamshaper")
FUNCTIONS_MODEL = os.environ.get("FUNCTIONS_MODEL", OPENAI_MODEL)
LLM_MODEL = os.environ.get("LLM_MODEL", OPENAI_MODEL)
VOICE_MODEL= os.environ.get("TTS_MODEL", "en-us-kathleen-low.onnx" )
PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", "/data")
MILVUS_HOST = os.environ.get("MILVUS_HOST", "")
MILVUS_PORT = os.environ.get("MILVUS_PORT", 0)
MEMORY_COLLECTION = os.environ.get("MEMORY_COLLECTION", "local")
MEMORY_CHUNK_SIZE = os.environ.get("MEMORY_CHUNK_SIZE", 600)
MEMORY_CHUNK_OVERLAP = os.environ.get("MEMORY_RESULTS", 110)
MEMORY_RESULTS = os.environ.get("MEMORY_RESULTS", 3)
MEMORY_SEARCH_TYPE = os.environ.get("MEMORY_SEARCH_TYPE", "mmr")

View File

@@ -0,0 +1,75 @@
from typing import Optional
import openai
from slack_bolt import BoltContext
from .openai_ops import GPT_3_5_TURBO_0301_MODEL
# All the supported languages for Slack app as of March 2023
_locale_to_lang = {
"en-US": "English",
"en-GB": "English",
"de-DE": "German",
"es-ES": "Spanish",
"es-LA": "Spanish",
"fr-FR": "French",
"it-IT": "Italian",
"pt-BR": "Portuguese",
"ru-RU": "Russian",
"ja-JP": "Japanese",
"zh-CN": "Chinese",
"zh-TW": "Chinese",
"ko-KR": "Korean",
}
def from_locale_to_lang(locale: Optional[str]) -> Optional[str]:
if locale is None:
return None
return _locale_to_lang.get(locale)
_translation_result_cache = {}
def translate(*, openai_api_key: str, context: BoltContext, text: str) -> str:
lang = from_locale_to_lang(context.get("locale"))
if lang is None or lang == "English":
return text
cached_result = _translation_result_cache.get(f"{lang}:{text}")
if cached_result is not None:
return cached_result
response = openai.ChatCompletion.create(
api_key=openai_api_key,
model=GPT_3_5_TURBO_0301_MODEL,
messages=[
{
"role": "system",
"content": "You're the AI model that primarily focuses on the quality of language translation. "
"You must not change the meaning of sentences when translating them into a different language. "
"You must provide direct translation result as much as possible. "
"When the given text is a single verb/noun, its translated text must be a norm/verb form too. "
"Slack's emoji (e.g., :hourglass_flowing_sand:) and mention parts must be kept as-is. "
"Your response must not include any additional notes in English. "
"Your response must omit English version / pronunciation guide for the result. ",
},
{
"role": "user",
"content": f"Can you translate {text} into {lang} in a professional tone? "
"Please respond with the only the translated text in a format suitable for Slack user interface. "
"No need to append any English notes and guides.",
},
],
top_p=1,
n=1,
max_tokens=1024,
temperature=1,
presence_penalty=0,
frequency_penalty=0,
logit_bias={},
user="system",
)
translated_text = response["choices"][0]["message"].get("content")
_translation_result_cache[f"{lang}:{text}"] = translated_text
return translated_text

View File

@@ -0,0 +1,53 @@
import re
# Conversion from Slack mrkdwn to OpenAI markdown
# See also: https://api.slack.com/reference/surfaces/formatting#basics
def slack_to_markdown(content: str) -> str:
# Split the input string into parts based on code blocks and inline code
parts = re.split(r"(```.+?```|`[^`\n]+?`)", content)
# Apply the bold, italic, and strikethrough formatting to text not within code
result = ""
for part in parts:
if part.startswith("```") or part.startswith("`"):
result += part
else:
for o, n in [
(r"\*(?!\s)([^\*\n]+?)(?<!\s)\*", r"**\1**"), # *bold* to **bold**
(r"_(?!\s)([^_\n]+?)(?<!\s)_", r"*\1*"), # _italic_ to *italic*
(r"~(?!\s)([^~\n]+?)(?<!\s)~", r"~~\1~~"), # ~strike~ to ~~strike~~
]:
part = re.sub(o, n, part)
result += part
return result
# Conversion from OpenAI markdown to Slack mrkdwn
# See also: https://api.slack.com/reference/surfaces/formatting#basics
def markdown_to_slack(content: str) -> str:
# Split the input string into parts based on code blocks and inline code
parts = re.split(r"(```.+?```|`[^`\n]+?`)", content)
# Apply the bold, italic, and strikethrough formatting to text not within code
result = ""
for part in parts:
if part.startswith("```") or part.startswith("`"):
result += part
else:
for o, n in [
(
r"\*\*\*(?!\s)([^\*\n]+?)(?<!\s)\*\*\*",
r"_*\1*_",
), # ***bold italic*** to *_bold italic_*
(
r"(?<![\*_])\*(?!\s)([^\*\n]+?)(?<!\s)\*(?![\*_])",
r"_\1_",
), # *italic* to _italic_
(r"\*\*(?!\s)([^\*\n]+?)(?<!\s)\*\*", r"*\1*"), # **bold** to *bold*
(r"__(?!\s)([^_\n]+?)(?<!\s)__", r"*\1*"), # __bold__ to *bold*
(r"~~(?!\s)([^~\n]+?)(?<!\s)~~", r"~\1~"), # ~~strike~~ to ~strike~
]:
part = re.sub(o, n, part)
result += part
return result

View File

@@ -0,0 +1,234 @@
import threading
import time
import re
from typing import List, Dict, Any, Generator
import openai
from openai.error import Timeout
from openai.openai_object import OpenAIObject
import tiktoken
from slack_bolt import BoltContext
from slack_sdk.web import WebClient
from app.markdown import slack_to_markdown, markdown_to_slack
from app.slack_ops import update_wip_message
from app.agent import (
localagi
)
# ----------------------------
# Internal functions
# ----------------------------
MAX_TOKENS = 1024
GPT_3_5_TURBO_0301_MODEL = "gpt-3.5-turbo-0301"
# Format message from Slack to send to OpenAI
def format_openai_message_content(content: str, translate_markdown: bool) -> str:
if content is None:
return None
# Unescape &, < and >, since Slack replaces these with their HTML equivalents
# See also: https://api.slack.com/reference/surfaces/formatting#escaping
content = content.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
# Convert from Slack mrkdwn to markdown format
if translate_markdown:
content = slack_to_markdown(content)
return content
def ask_llm(
*,
messages: List[Dict[str, str]],
) -> str:
# Remove old messages to make sure we have room for max_tokens
# See also: https://platform.openai.com/docs/guides/chat/introduction
# > total tokens must be below the models maximum limit (4096 tokens for gpt-3.5-turbo-0301)
# TODO: currently we don't pass gpt-4 to this calculation method
while calculate_num_tokens(messages) >= 4096 - MAX_TOKENS:
removed = False
for i, message in enumerate(messages):
if message["role"] in ("user", "assistant"):
del messages[i]
removed = True
break
if not removed:
# Fall through and let the OpenAI error handler deal with it
break
prompt=""
for i, message in enumerate(messages):
prompt += message["content"] + "\n"
return localagi(prompt)
def consume_openai_stream_to_write_reply(
*,
client: WebClient,
wip_reply: dict,
context: BoltContext,
user_id: str,
messages: List[Dict[str, str]],
steam: Generator[OpenAIObject, Any, None],
timeout_seconds: int,
translate_markdown: bool,
):
start_time = time.time()
assistant_reply: Dict[str, str] = {"role": "assistant", "content": ""}
messages.append(assistant_reply)
word_count = 0
threads = []
try:
loading_character = " ... :writing_hand:"
for chunk in steam:
spent_seconds = time.time() - start_time
if timeout_seconds < spent_seconds:
raise Timeout()
item = chunk.choices[0]
if item.get("finish_reason") is not None:
break
delta = item.get("delta")
if delta.get("content") is not None:
word_count += 1
assistant_reply["content"] += delta.get("content")
if word_count >= 20:
def update_message():
assistant_reply_text = format_assistant_reply(
assistant_reply["content"], translate_markdown
)
wip_reply["message"]["text"] = assistant_reply_text
update_wip_message(
client=client,
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=assistant_reply_text + loading_character,
messages=messages,
user=user_id,
)
thread = threading.Thread(target=update_message)
thread.daemon = True
thread.start()
threads.append(thread)
word_count = 0
for t in threads:
try:
if t.is_alive():
t.join()
except Exception:
pass
assistant_reply_text = format_assistant_reply(
assistant_reply["content"], translate_markdown
)
wip_reply["message"]["text"] = assistant_reply_text
update_wip_message(
client=client,
channel=context.channel_id,
ts=wip_reply["message"]["ts"],
text=assistant_reply_text,
messages=messages,
user=user_id,
)
finally:
for t in threads:
try:
if t.is_alive():
t.join()
except Exception:
pass
try:
steam.close()
except Exception:
pass
def calculate_num_tokens(
messages: List[Dict[str, str]],
# TODO: adjustment for gpt-4
model: str = GPT_3_5_TURBO_0301_MODEL,
) -> int:
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model == GPT_3_5_TURBO_0301_MODEL:
# note: future models may deviate from this
num_tokens = 0
for message in messages:
# every message follows <im_start>{role/name}\n{content}<im_end>\n
num_tokens += 4
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
error = (
f"Calculating the number of tokens for for model {model} is not yet supported. "
"See https://github.com/openai/openai-python/blob/main/chatml.md "
"for information on how messages are converted to tokens."
)
raise NotImplementedError(error)
# Format message from OpenAI to display in Slack
def format_assistant_reply(content: str, translate_markdown: bool) -> str:
for o, n in [
# Remove leading newlines
("^\n+", ""),
# Remove prepended Slack user ID
("^<@U.*?>\\s?:\\s?", ""),
# Remove OpenAI syntax tags since Slack doesn't render them in a message
("```\\s*[Rr]ust\n", "```\n"),
("```\\s*[Rr]uby\n", "```\n"),
("```\\s*[Ss]cala\n", "```\n"),
("```\\s*[Kk]otlin\n", "```\n"),
("```\\s*[Jj]ava\n", "```\n"),
("```\\s*[Gg]o\n", "```\n"),
("```\\s*[Ss]wift\n", "```\n"),
("```\\s*[Oo]objective[Cc]\n", "```\n"),
("```\\s*[Cc]\n", "```\n"),
("```\\s*[Cc][+][+]\n", "```\n"),
("```\\s*[Cc][Pp][Pp]\n", "```\n"),
("```\\s*[Cc]sharp\n", "```\n"),
("```\\s*[Mm]atlab\n", "```\n"),
("```\\s*[Jj][Ss][Oo][Nn]\n", "```\n"),
("```\\s*[Ll]a[Tt]e[Xx]\n", "```\n"),
("```\\s*bash\n", "```\n"),
("```\\s*zsh\n", "```\n"),
("```\\s*sh\n", "```\n"),
("```\\s*[Ss][Qq][Ll]\n", "```\n"),
("```\\s*[Pp][Hh][Pp]\n", "```\n"),
("```\\s*[Pp][Ee][Rr][Ll]\n", "```\n"),
("```\\s*[Jj]ava[Ss]cript", "```\n"),
("```\\s*[Ty]ype[Ss]cript", "```\n"),
("```\\s*[Pp]ython\n", "```\n"),
]:
content = re.sub(o, n, content)
# Convert from OpenAI markdown to Slack mrkdwn format
if translate_markdown:
content = markdown_to_slack(content)
return content
def build_system_text(
system_text_template: str, translate_markdown: bool, context: BoltContext
):
system_text = system_text_template.format(bot_user_id=context.bot_user_id)
# Translate format hint in system prompt
if translate_markdown is True:
system_text = slack_to_markdown(system_text)
return system_text

View File

@@ -0,0 +1,110 @@
from typing import Optional
from typing import List, Dict
from slack_sdk.web import WebClient, SlackResponse
from slack_bolt import BoltContext
# ----------------------------
# General operations in a channel
# ----------------------------
def find_parent_message(
client: WebClient, channel_id: Optional[str], thread_ts: Optional[str]
) -> Optional[dict]:
if channel_id is None or thread_ts is None:
return None
messages = client.conversations_history(
channel=channel_id,
latest=thread_ts,
limit=1,
inclusive=1,
).get("messages", [])
return messages[0] if len(messages) > 0 else None
def is_no_mention_thread(context: BoltContext, parent_message: dict) -> bool:
parent_message_text = parent_message.get("text", "")
return f"<@{context.bot_user_id}>" in parent_message_text
# ----------------------------
# WIP reply message stuff
# ----------------------------
def post_wip_message(
*,
client: WebClient,
channel: str,
thread_ts: str,
loading_text: str,
messages: List[Dict[str, str]],
user: str,
) -> SlackResponse:
system_messages = [msg for msg in messages if msg["role"] == "system"]
return client.chat_postMessage(
channel=channel,
thread_ts=thread_ts,
text=loading_text,
metadata={
"event_type": "chat-gpt-convo",
"event_payload": {"messages": system_messages, "user": user},
},
)
def update_wip_message(
client: WebClient,
channel: str,
ts: str,
text: str,
messages: List[Dict[str, str]],
user: str,
) -> SlackResponse:
system_messages = [msg for msg in messages if msg["role"] == "system"]
return client.chat_update(
channel=channel,
ts=ts,
text=text,
metadata={
"event_type": "chat-gpt-convo",
"event_payload": {"messages": system_messages, "user": user},
},
)
# ----------------------------
# Home tab
# ----------------------------
DEFAULT_HOME_TAB_MESSAGE = (
"To enable this app in this Slack workspace, you need to save your OpenAI API key. "
"Visit <https://platform.openai.com/account/api-keys|your developer page> to grap your key!"
)
DEFAULT_HOME_TAB_CONFIGURE_LABEL = "Configure"
def build_home_tab(message: str, configure_label: str) -> dict:
return {
"type": "home",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": message,
},
"accessory": {
"action_id": "configure",
"type": "button",
"text": {"type": "plain_text", "text": configure_label},
"style": "primary",
"value": "api_key",
},
}
],
}