diff --git a/example/webui/agentpool.go b/example/webui/agentpool.go index f307a82..3005e8d 100644 --- a/example/webui/agentpool.go +++ b/example/webui/agentpool.go @@ -37,6 +37,8 @@ type AgentConfig struct { IdentityGuidance string `json:"identity_guidance" form:"identity_guidance"` PeriodicRuns string `json:"periodic_runs" form:"periodic_runs"` PermanentGoal string `json:"permanent_goal" form:"permanent_goal"` + EnableKnowledgeBase bool `json:"enable_kb" form:"enable_kb"` + KnowledgeBaseResults int `json:"kb_results" form:"kb_results"` } type AgentPool struct { @@ -286,6 +288,14 @@ func (a *AgentPool) startAgentWithConfig(name string, config *AgentConfig) error } } + if config.EnableKnowledgeBase { + opts = append(opts, EnableKnowledgeBase) + } + + if config.KnowledgeBaseResults > 0 { + opts = append(opts, EnableKnowledgeBaseWithResults(config.KnowledgeBaseResults)) + } + fmt.Println("Starting agent", name) fmt.Printf("Config %+v\n", config) agent, err := New(opts...) diff --git a/example/webui/create.html b/example/webui/create.html index 95246a3..511725e 100644 --- a/example/webui/create.html +++ b/example/webui/create.html @@ -84,7 +84,13 @@
- + + +
+ + +
+ diff --git a/example/webui/knowledgebase.html b/example/webui/knowledgebase.html new file mode 100644 index 0000000..33fd82c --- /dev/null +++ b/example/webui/knowledgebase.html @@ -0,0 +1,31 @@ + + + + + KnowledgeBase + + + + + + +
+

Knowledgebase (items: {{.KnowledgebaseItemsCount}})

+ +
+ Add sites to KB +
+ + +
+ + +
+ +
+
+
+ + diff --git a/example/webui/main.go b/example/webui/main.go index af7d9b4..42a7fd2 100644 --- a/example/webui/main.go +++ b/example/webui/main.go @@ -23,6 +23,7 @@ type ( var testModel = os.Getenv("TEST_MODEL") var apiURL = os.Getenv("API_URL") +var apiKey = os.Getenv("API_KEY") func init() { if testModel == "" { @@ -45,13 +46,25 @@ func main() { if err != nil { panic(err) } - os.MkdirAll(cwd+"/pool", 0755) - pool, err := NewAgentPool(testModel, apiURL, cwd+"/pool") + stateDir := cwd + "/pool" + os.MkdirAll(stateDir, 0755) + + pool, err := NewAgentPool(testModel, apiURL, stateDir) if err != nil { panic(err) - } + + db, err := NewInMemoryDB(stateDir) + if err != nil { + panic(err) + } + + // Reload store + // if err := db.SaveToStore(apiKey, apiURL); err != nil { + // fmt.Println("Error storing in the KB", err) + // } + app := &App{ htmx: htmx.New(), pool: pool, @@ -61,19 +74,6 @@ func main() { panic(err) } - // go func() { - // for { - // clientsStr := "" - // clients := sseManager.Clients() - // for _, c := range clients { - // clientsStr += c + ", " - // } - - // time.Sleep(1 * time.Second) // Send a message every seconds - // sseManager.Send(NewMessage(fmt.Sprintf("connected clients: %v", clientsStr)).WithEvent("clients")) - // } - // }() - // Initialize a new Fiber app webapp := fiber.New() @@ -94,6 +94,13 @@ func main() { }) }) + webapp.Get("/knowledgebase", func(c *fiber.Ctx) error { + return c.Render("knowledgebase.html", fiber.Map{ + "Title": "Hello, World!", + "KnowledgebaseItemsCount": len(db.Database), + }) + }) + // Define a route for the GET method on the root path '/' webapp.Get("/sse/:name", func(c *fiber.Ctx) error { @@ -110,6 +117,7 @@ func main() { webapp.Post("/chat/:name", app.Chat(pool)) webapp.Post("/create", app.Create(pool)) webapp.Get("/delete/:name", app.Delete(pool)) + webapp.Post("/knowledgebase", app.KnowledgeBase(db)) webapp.Get("/talk/:name", func(c *fiber.Ctx) error { return c.Render("chat.html", fiber.Map{ @@ -119,29 +127,48 @@ func main() { }) log.Fatal(webapp.Listen(":3000")) - - // mux := http.NewServeMux() - - // mux.Handle("GET /", http.HandlerFunc(app.Home(agent))) - - // // External notifications (e.g. webhook) - // mux.Handle("POST /notify", http.HandlerFunc(app.Notify)) - - // // User chat - // mux.Handle("POST /chat", http.HandlerFunc(app.Chat(sseManager))) - - // // Server Sent Events - // //mux.Handle("GET /sse", http.HandlerFunc(app.SSE)) - - // fmt.Print("Server started at http://localhost:3210") - // err = http.ListenAndServe(":3210", mux) - // log.Fatal(err) } -// func (a *App) SSE(w http.ResponseWriter, r *http.Request) { -// cl := sse.NewClient(randStringRunes(10)) -// sseManager.Handle(w, r, cl) -// } +func (a *App) KnowledgeBase(db *InMemoryDatabase) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + payload := struct { + URL string `json:"url"` + }{} + + if err := c.BodyParser(&payload); err != nil { + return err + } + + website := payload.URL + if website == "" { + return fmt.Errorf("please enter a URL") + } + + go func() { + content, err := Sitemap(website) + if err != nil { + fmt.Println("Error walking sitemap for website", err) + } + fmt.Println("Found pages: ", len(content)) + + for _, c := range content { + chunks := splitParagraphIntoChunks(c, 256) + fmt.Println("chunks: ", len(chunks)) + for _, chunk := range chunks { + db.AddEntry(chunk) + } + + db.SaveDB() + } + + if err := db.SaveToStore(apiKey, apiURL); err != nil { + fmt.Println("Error storing in the KB", err) + } + }() + + return c.Redirect("/knowledgebase") + } +} func (a *App) Notify(pool *AgentPool) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { diff --git a/example/webui/rag.go b/example/webui/rag.go new file mode 100644 index 0000000..ace8345 --- /dev/null +++ b/example/webui/rag.go @@ -0,0 +1,156 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + + "jaytaylor.com/html2text" + + "github.com/mudler/local-agent-framework/llm" + sitemap "github.com/oxffaa/gopher-parse-sitemap" +) + +type InMemoryDatabase struct { + sync.Mutex + Database []string + path string +} + +func loadDB(path string) ([]string, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + poolData := []string{} + err = json.Unmarshal(data, &poolData) + return poolData, err +} + +func NewInMemoryDB(knowledgebase string) (*InMemoryDatabase, error) { + // if file exists, try to load an existing pool. + // if file does not exist, create a new pool. + + poolfile := filepath.Join(knowledgebase, "knowledgebase.json") + + if _, err := os.Stat(poolfile); err != nil { + // file does not exist, return a new pool + return &InMemoryDatabase{ + Database: []string{}, + path: poolfile, + }, nil + } + + poolData, err := loadDB(poolfile) + if err != nil { + return nil, err + } + return &InMemoryDatabase{ + Database: poolData, + path: poolfile, + }, nil +} + +func (db *InMemoryDatabase) SaveToStore(apiKey string, apiURL string) error { + for _, d := range db.Database { + lai := llm.NewClient(apiKey, apiURL+"/v1") + laiStore := llm.NewStoreClient(apiURL, apiKey) + + err := llm.StoreStringEmbeddingInVectorDB(laiStore, lai, d) + if err != nil { + return fmt.Errorf("Error storing in the KB: %w", err) + } + } + + return nil +} +func (db *InMemoryDatabase) AddEntry(entry string) error { + db.Lock() + defer db.Unlock() + db.Database = append(db.Database, entry) + return nil +} + +func (db *InMemoryDatabase) SaveDB() error { + db.Lock() + defer db.Unlock() + data, err := json.Marshal(db.Database) + if err != nil { + return err + } + + err = os.WriteFile(db.path, data, 0644) + return err +} + +func getWebPage(url string) (string, error) { + resp, err := http.Get(url) + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return html2text.FromString(string(body), html2text.Options{PrettyTables: true}) +} + +func Sitemap(url string) (res []string, err error) { + err = sitemap.ParseFromSite(url, func(e sitemap.Entry) error { + fmt.Println("Sitemap page: " + e.GetLocation()) + content, err := getWebPage(e.GetLocation()) + if err == nil { + res = append(res, content) + } + return nil + }) + return +} + +// splitParagraphIntoChunks takes a paragraph and a maxChunkSize as input, +// and returns a slice of strings where each string is a chunk of the paragraph +// that is at most maxChunkSize long, ensuring that words are not split. +func splitParagraphIntoChunks(paragraph string, maxChunkSize int) []string { + // Check if the paragraph length is less than or equal to maxChunkSize. + // If so, return the paragraph as the only chunk. + if len(paragraph) <= maxChunkSize { + return []string{paragraph} + } + + var chunks []string + var currentChunk strings.Builder + + words := strings.Fields(paragraph) // Splits the paragraph into words. + + for _, word := range words { + // Check if adding the next word would exceed the maxChunkSize. + // If so, add the currentChunk to the chunks slice and start a new chunk. + if currentChunk.Len()+len(word) > maxChunkSize { + chunks = append(chunks, currentChunk.String()) + currentChunk.Reset() + } + + // Add a space before the word if it's not the beginning of a new chunk. + if currentChunk.Len() > 0 { + currentChunk.WriteString(" ") + } + + // Add the word to the current chunk. + currentChunk.WriteString(word) + } + + // Add the last chunk if it's not empty. + if currentChunk.Len() > 0 { + chunks = append(chunks, currentChunk.String()) + } + + return chunks +} diff --git a/go.mod b/go.mod index 4ea46cc..a729aa0 100644 --- a/go.mod +++ b/go.mod @@ -33,7 +33,10 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect + github.com/oxffaa/gopher-parse-sitemap v0.0.0-20191021113419-005d2eb1def4 // indirect github.com/rivo/uniseg v0.2.0 // indirect + github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect github.com/stretchr/testify v1.9.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect @@ -43,4 +46,5 @@ require ( golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.16.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056 // indirect ) diff --git a/go.sum b/go.sum index 2c4e43c..c39285b 100644 --- a/go.sum +++ b/go.sum @@ -48,12 +48,17 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY= github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM= github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= +github.com/oxffaa/gopher-parse-sitemap v0.0.0-20191021113419-005d2eb1def4 h1:2vmb32OdDhjZf2ETGDlr9n8RYXx7c+jXPxMiPbwnA+8= +github.com/oxffaa/gopher-parse-sitemap v0.0.0-20191021113419-005d2eb1def4/go.mod h1:2JQx4jDHmWrbABvpOayg/+OTU6ehN0IyK2EHzceXpJo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= @@ -64,6 +69,8 @@ github.com/sashabaranov/go-openai v1.18.3 h1:dspFGkmZbhjg1059KhqLYSV2GaCiRIn+bOu github.com/sashabaranov/go-openai v1.18.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= github.com/slack-go/slack v0.12.5 h1:ddZ6uz6XVaB+3MTDhoW04gG+Vc/M/X1ctC+wssy2cqs= github.com/slack-go/slack v0.12.5/go.mod h1:hlGi5oXA+Gt+yWTPP0plCdRKmjsDxecdHxYQdlMQKOw= +github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= +github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -107,3 +114,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056 h1:6YFJoB+0fUH6X3xU/G2tQqCYg+PkGtnZ5nMR5rpw72g= +jaytaylor.com/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:OxvTsCwKosqQ1q7B+8FwXqg4rKZ/UG9dUW+g/VL2xH4=