Support pdf ingestion
This commit is contained in:
@@ -1,12 +1,15 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
|
||||||
. "github.com/mudler/local-agent-framework/agent"
|
. "github.com/mudler/local-agent-framework/agent"
|
||||||
|
|
||||||
"github.com/donseba/go-htmx"
|
"github.com/donseba/go-htmx"
|
||||||
|
"github.com/dslipak/pdf"
|
||||||
fiber "github.com/gofiber/fiber/v2"
|
fiber "github.com/gofiber/fiber/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,6 +20,54 @@ type (
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func (a *App) KnowledgeBaseFile(db *InMemoryDatabase) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
// https://golang.withcodeexample.com/blog/file-upload-handling-golang-fiber-guide/
|
||||||
|
// Handle file upload logic
|
||||||
|
file, err := c.FormFile("file")
|
||||||
|
if err != nil {
|
||||||
|
// Handle error
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
payload := struct {
|
||||||
|
ChunkSize int `form:"chunk_size"`
|
||||||
|
}{}
|
||||||
|
|
||||||
|
if err := c.BodyParser(&payload); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
os.MkdirAll("./uploads", os.ModePerm)
|
||||||
|
|
||||||
|
destination := fmt.Sprintf("./uploads/%s", file.Filename)
|
||||||
|
if err := c.SaveFile(file, destination); err != nil {
|
||||||
|
// Handle error
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("File uploaded to: " + destination)
|
||||||
|
fmt.Printf("Payload: %+v\n", payload)
|
||||||
|
|
||||||
|
content, err := readPdf(destination) // Read local pdf file
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("Content is", content)
|
||||||
|
chunkSize := defaultChunkSize
|
||||||
|
if payload.ChunkSize > 0 {
|
||||||
|
chunkSize = payload.ChunkSize
|
||||||
|
}
|
||||||
|
|
||||||
|
go StringsToKB(db, chunkSize, content)
|
||||||
|
|
||||||
|
_, err = c.WriteString(chatDiv("File uploaded", "gray"))
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) KnowledgeBase(db *InMemoryDatabase) func(c *fiber.Ctx) error {
|
func (a *App) KnowledgeBase(db *InMemoryDatabase) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
payload := struct {
|
payload := struct {
|
||||||
@@ -153,3 +204,17 @@ func (a *App) Chat(pool *AgentPool) func(c *fiber.Ctx) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func readPdf(path string) (string, error) {
|
||||||
|
r, err := pdf.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
var buf bytes.Buffer
|
||||||
|
b, err := r.GetPlainText()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
buf.ReadFrom(b)
|
||||||
|
return buf.String(), nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -127,6 +127,10 @@ func WebsiteToKB(website string, chunkSize int, db *InMemoryDatabase) {
|
|||||||
fmt.Println("Found pages: ", len(content))
|
fmt.Println("Found pages: ", len(content))
|
||||||
fmt.Println("ChunkSize: ", chunkSize)
|
fmt.Println("ChunkSize: ", chunkSize)
|
||||||
|
|
||||||
|
StringsToKB(db, chunkSize, content...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func StringsToKB(db *InMemoryDatabase, chunkSize int, content ...string) {
|
||||||
for _, c := range content {
|
for _, c := range content {
|
||||||
chunks := splitParagraphIntoChunks(c, chunkSize)
|
chunks := splitParagraphIntoChunks(c, chunkSize)
|
||||||
fmt.Println("chunks: ", len(chunks))
|
fmt.Println("chunks: ", len(chunks))
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ func RegisterRoutes(webapp *fiber.App, pool *AgentPool, db *InMemoryDatabase, ap
|
|||||||
webapp.Post("/create", app.Create(pool))
|
webapp.Post("/create", app.Create(pool))
|
||||||
webapp.Get("/delete/:name", app.Delete(pool))
|
webapp.Get("/delete/:name", app.Delete(pool))
|
||||||
webapp.Post("/knowledgebase", app.KnowledgeBase(db))
|
webapp.Post("/knowledgebase", app.KnowledgeBase(db))
|
||||||
|
webapp.Post("/knowledgebase/upload", app.KnowledgeBaseFile(db))
|
||||||
|
|
||||||
webapp.Get("/talk/:name", func(c *fiber.Ctx) error {
|
webapp.Get("/talk/:name", func(c *fiber.Ctx) error {
|
||||||
return c.Render("chat.html", fiber.Map{
|
return c.Render("chat.html", fiber.Map{
|
||||||
|
|||||||
@@ -29,6 +29,30 @@
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
<form id='form' hx-encoding='multipart/form-data' hx-post='/knowledgebase/upload'>
|
||||||
|
<div class="mb-6">
|
||||||
|
<label for="file" class="block text-lg font-medium text-gray-400">File</label>
|
||||||
|
<input type='file' name='file' id='file' class="mt-1 focus:ring-indigo-500 focus:border-indigo-500 block w-full shadow-sm sm:text-lg border-gray-300 rounded-md bg-gray-700 text-white">
|
||||||
|
</div>
|
||||||
|
<div class="mb-6">
|
||||||
|
<label for="chunk_size" class="block text-lg font-medium text-gray-400">Chunk size</label>
|
||||||
|
<input type="text" name="chunk_size" id="chunk_size" class="mt-1 focus:ring-indigo-500 focus:border-indigo-500 block w-full shadow-sm sm:text-lg border-gray-300 rounded-md bg-gray-700 text-white" placeholder="380">
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<button type="submit" class="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-blue-500 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500">
|
||||||
|
Upload file
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div class="mb-6">
|
||||||
|
<progress id='progress' value='0' max='100'></progress>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
<script>
|
||||||
|
htmx.on('#form', 'htmx:xhr:progress', function(evt) {
|
||||||
|
htmx.find('#progress').setAttribute('value', evt.detail.loaded/evt.detail.total * 100)
|
||||||
|
});
|
||||||
|
</script>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
1
go.mod
1
go.mod
@@ -22,6 +22,7 @@ require (
|
|||||||
github.com/PuerkitoBio/goquery v1.6.0 // indirect
|
github.com/PuerkitoBio/goquery v1.6.0 // indirect
|
||||||
github.com/andybalholm/brotli v1.1.0 // indirect
|
github.com/andybalholm/brotli v1.1.0 // indirect
|
||||||
github.com/andybalholm/cascadia v1.1.0 // indirect
|
github.com/andybalholm/cascadia v1.1.0 // indirect
|
||||||
|
github.com/dslipak/pdf v0.0.2 // indirect
|
||||||
github.com/go-logr/logr v1.3.0 // indirect
|
github.com/go-logr/logr v1.3.0 // indirect
|
||||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||||
github.com/gofiber/template v1.8.3 // indirect
|
github.com/gofiber/template v1.8.3 // indirect
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -14,6 +14,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
|||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/donseba/go-htmx v1.8.0 h1:oTx1uUsjXZZVvcZfulZvBSPtdD1jzsvZyuK91+Q8zPE=
|
github.com/donseba/go-htmx v1.8.0 h1:oTx1uUsjXZZVvcZfulZvBSPtdD1jzsvZyuK91+Q8zPE=
|
||||||
github.com/donseba/go-htmx v1.8.0/go.mod h1:8PTAYvNKf8+QYis+DpAsggKz+sa2qljtMgvdAeNBh5s=
|
github.com/donseba/go-htmx v1.8.0/go.mod h1:8PTAYvNKf8+QYis+DpAsggKz+sa2qljtMgvdAeNBh5s=
|
||||||
|
github.com/dslipak/pdf v0.0.2 h1:djAvcM5neg9Ush+zR6QXB+VMJzR6TdnX766HPIg1JmI=
|
||||||
|
github.com/dslipak/pdf v0.0.2/go.mod h1:2L3SnkI9cQwnAS9gfPz2iUoLC0rUZwbucpbKi5R1mUo=
|
||||||
github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
|
github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
|
||||||
github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||||
|
|||||||
Reference in New Issue
Block a user