Support pdf ingestion
This commit is contained in:
@@ -1,12 +1,15 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
. "github.com/mudler/local-agent-framework/agent"
|
||||
|
||||
"github.com/donseba/go-htmx"
|
||||
"github.com/dslipak/pdf"
|
||||
fiber "github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
@@ -17,6 +20,54 @@ type (
|
||||
}
|
||||
)
|
||||
|
||||
func (a *App) KnowledgeBaseFile(db *InMemoryDatabase) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
// https://golang.withcodeexample.com/blog/file-upload-handling-golang-fiber-guide/
|
||||
// Handle file upload logic
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
// Handle error
|
||||
return err
|
||||
}
|
||||
|
||||
payload := struct {
|
||||
ChunkSize int `form:"chunk_size"`
|
||||
}{}
|
||||
|
||||
if err := c.BodyParser(&payload); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
os.MkdirAll("./uploads", os.ModePerm)
|
||||
|
||||
destination := fmt.Sprintf("./uploads/%s", file.Filename)
|
||||
if err := c.SaveFile(file, destination); err != nil {
|
||||
// Handle error
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Println("File uploaded to: " + destination)
|
||||
fmt.Printf("Payload: %+v\n", payload)
|
||||
|
||||
content, err := readPdf(destination) // Read local pdf file
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
fmt.Println("Content is", content)
|
||||
chunkSize := defaultChunkSize
|
||||
if payload.ChunkSize > 0 {
|
||||
chunkSize = payload.ChunkSize
|
||||
}
|
||||
|
||||
go StringsToKB(db, chunkSize, content)
|
||||
|
||||
_, err = c.WriteString(chatDiv("File uploaded", "gray"))
|
||||
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
func (a *App) KnowledgeBase(db *InMemoryDatabase) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
payload := struct {
|
||||
@@ -153,3 +204,17 @@ func (a *App) Chat(pool *AgentPool) func(c *fiber.Ctx) error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func readPdf(path string) (string, error) {
|
||||
r, err := pdf.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
b, err := r.GetPlainText()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
buf.ReadFrom(b)
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
@@ -127,6 +127,10 @@ func WebsiteToKB(website string, chunkSize int, db *InMemoryDatabase) {
|
||||
fmt.Println("Found pages: ", len(content))
|
||||
fmt.Println("ChunkSize: ", chunkSize)
|
||||
|
||||
StringsToKB(db, chunkSize, content...)
|
||||
}
|
||||
|
||||
func StringsToKB(db *InMemoryDatabase, chunkSize int, content ...string) {
|
||||
for _, c := range content {
|
||||
chunks := splitParagraphIntoChunks(c, chunkSize)
|
||||
fmt.Println("chunks: ", len(chunks))
|
||||
|
||||
@@ -55,6 +55,7 @@ func RegisterRoutes(webapp *fiber.App, pool *AgentPool, db *InMemoryDatabase, ap
|
||||
webapp.Post("/create", app.Create(pool))
|
||||
webapp.Get("/delete/:name", app.Delete(pool))
|
||||
webapp.Post("/knowledgebase", app.KnowledgeBase(db))
|
||||
webapp.Post("/knowledgebase/upload", app.KnowledgeBaseFile(db))
|
||||
|
||||
webapp.Get("/talk/:name", func(c *fiber.Ctx) error {
|
||||
return c.Render("chat.html", fiber.Map{
|
||||
|
||||
@@ -29,6 +29,30 @@
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<form id='form' hx-encoding='multipart/form-data' hx-post='/knowledgebase/upload'>
|
||||
<div class="mb-6">
|
||||
<label for="file" class="block text-lg font-medium text-gray-400">File</label>
|
||||
<input type='file' name='file' id='file' class="mt-1 focus:ring-indigo-500 focus:border-indigo-500 block w-full shadow-sm sm:text-lg border-gray-300 rounded-md bg-gray-700 text-white">
|
||||
</div>
|
||||
<div class="mb-6">
|
||||
<label for="chunk_size" class="block text-lg font-medium text-gray-400">Chunk size</label>
|
||||
<input type="text" name="chunk_size" id="chunk_size" class="mt-1 focus:ring-indigo-500 focus:border-indigo-500 block w-full shadow-sm sm:text-lg border-gray-300 rounded-md bg-gray-700 text-white" placeholder="380">
|
||||
</div>
|
||||
<div class="flex items-center justify-between">
|
||||
<button type="submit" class="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-blue-500 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500">
|
||||
Upload file
|
||||
</button>
|
||||
</div>
|
||||
<div class="mb-6">
|
||||
<progress id='progress' value='0' max='100'></progress>
|
||||
</div>
|
||||
</form>
|
||||
<script>
|
||||
htmx.on('#form', 'htmx:xhr:progress', function(evt) {
|
||||
htmx.find('#progress').setAttribute('value', evt.detail.loaded/evt.detail.total * 100)
|
||||
});
|
||||
</script>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
1
go.mod
1
go.mod
@@ -22,6 +22,7 @@ require (
|
||||
github.com/PuerkitoBio/goquery v1.6.0 // indirect
|
||||
github.com/andybalholm/brotli v1.1.0 // indirect
|
||||
github.com/andybalholm/cascadia v1.1.0 // indirect
|
||||
github.com/dslipak/pdf v0.0.2 // indirect
|
||||
github.com/go-logr/logr v1.3.0 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/gofiber/template v1.8.3 // indirect
|
||||
|
||||
2
go.sum
2
go.sum
@@ -14,6 +14,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/donseba/go-htmx v1.8.0 h1:oTx1uUsjXZZVvcZfulZvBSPtdD1jzsvZyuK91+Q8zPE=
|
||||
github.com/donseba/go-htmx v1.8.0/go.mod h1:8PTAYvNKf8+QYis+DpAsggKz+sa2qljtMgvdAeNBh5s=
|
||||
github.com/dslipak/pdf v0.0.2 h1:djAvcM5neg9Ush+zR6QXB+VMJzR6TdnX766HPIg1JmI=
|
||||
github.com/dslipak/pdf v0.0.2/go.mod h1:2L3SnkI9cQwnAS9gfPz2iUoLC0rUZwbucpbKi5R1mUo=
|
||||
github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
|
||||
github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
|
||||
Reference in New Issue
Block a user