Support pdf ingestion

This commit is contained in:
Ettore Di Giacinto
2024-04-11 00:40:46 +02:00
parent cb35f871db
commit d237e17719
6 changed files with 97 additions and 0 deletions

View File

@@ -127,6 +127,10 @@ func WebsiteToKB(website string, chunkSize int, db *InMemoryDatabase) {
fmt.Println("Found pages: ", len(content))
fmt.Println("ChunkSize: ", chunkSize)
StringsToKB(db, chunkSize, content...)
}
func StringsToKB(db *InMemoryDatabase, chunkSize int, content ...string) {
for _, c := range content {
chunks := splitParagraphIntoChunks(c, chunkSize)
fmt.Println("chunks: ", len(chunks))