package state

import (
	"encoding/json"
	"fmt"
	"io"

	"github.com/mudler/LocalAgent/pkg/xlog"

	"net/http"
	"os"
	"strings"
	"sync"

	. "github.com/mudler/LocalAgent/core/agent"
	"jaytaylor.com/html2text"

	sitemap "github.com/oxffaa/gopher-parse-sitemap"
)

type InMemoryDatabase struct {
	RAGDB
	sync.Mutex
	Database []string
	path     string
}

func loadDB(path string) ([]string, error) {
	data, err := os.ReadFile(path)
	if err != nil {
		return nil, err
	}

	poolData := []string{}
	err = json.Unmarshal(data, &poolData)
	return poolData, err
}

func NewInMemoryDB(poolfile string, store RAGDB) (*InMemoryDatabase, error) {
	// if file exists, try to load an existing pool.
	// if file does not exist, create a new pool.

	if _, err := os.Stat(poolfile); err != nil {
		// file does not exist, return a new pool
		return &InMemoryDatabase{
			Database: []string{},
			path:     poolfile,
			RAGDB:    store,
		}, nil
	}

	poolData, err := loadDB(poolfile)
	if err != nil {
		return nil, err
	}
	db := &InMemoryDatabase{
		RAGDB:    store,
		Database: poolData,
		path:     poolfile,
	}

	if err := db.populateRAGDB(); err != nil {
		return nil, fmt.Errorf("error populating RAGDB: %w", err)
	}

	return db, nil
}

func (db *InMemoryDatabase) Data() []string {
	db.Lock()
	defer db.Unlock()
	return db.Database
}

func (db *InMemoryDatabase) populateRAGDB() error {
	for _, d := range db.Database {
		if d == "" {
			// skip empty chunks
			continue
		}
		err := db.RAGDB.Store(d)
		if err != nil {
			return fmt.Errorf("error storing in the KB: %w", err)
		}
	}

	return nil
}

func (db *InMemoryDatabase) Reset() error {
	db.Lock()
	db.Database = []string{}
	db.Unlock()
	if err := db.RAGDB.Reset(); err != nil {
		return err
	}
	return db.SaveDB()
}

func (db *InMemoryDatabase) save() error {
	data, err := json.Marshal(db.Database)
	if err != nil {
		return err
	}

	return os.WriteFile(db.path, data, 0644)
}

func (db *InMemoryDatabase) Store(entry string) error {
	db.Lock()
	defer db.Unlock()
	db.Database = append(db.Database, entry)
	if err := db.RAGDB.Store(entry); err != nil {
		return err
	}
	return db.save()
}

func (db *InMemoryDatabase) SaveDB() error {
	db.Lock()
	defer db.Unlock()
	return db.save()
}

func getWebPage(url string) (string, error) {
	resp, err := http.Get(url)
	if err != nil {
		return "", err
	}
	defer resp.Body.Close()

	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return "", err
	}
	return html2text.FromString(string(body), html2text.Options{PrettyTables: true})
}

func getWebSitemap(url string) (res []string, err error) {
	err = sitemap.ParseFromSite(url, func(e sitemap.Entry) error {
		xlog.Info("Sitemap page: " + e.GetLocation())
		content, err := getWebPage(e.GetLocation())
		if err == nil {
			res = append(res, content)
		}
		return nil
	})
	return
}

func WebsiteToKB(website string, chunkSize int, db RAGDB) {
	content, err := getWebSitemap(website)
	if err != nil {
		xlog.Info("Error walking sitemap for website", err)
	}
	xlog.Info("Found pages: ", len(content))
	xlog.Info("ChunkSize: ", chunkSize)

	StringsToKB(db, chunkSize, content...)
}

func StringsToKB(db RAGDB, chunkSize int, content ...string) {
	for _, c := range content {
		chunks := splitParagraphIntoChunks(c, chunkSize)
		xlog.Info("chunks: ", len(chunks))
		for _, chunk := range chunks {
			xlog.Info("Chunk size: ", len(chunk))
			db.Store(chunk)
		}
	}
}

// splitParagraphIntoChunks takes a paragraph and a maxChunkSize as input,
// and returns a slice of strings where each string is a chunk of the paragraph
// that is at most maxChunkSize long, ensuring that words are not split.
func splitParagraphIntoChunks(paragraph string, maxChunkSize int) []string {
	if len(paragraph) <= maxChunkSize {
		return []string{paragraph}
	}

	var chunks []string
	var currentChunk strings.Builder

	words := strings.Fields(paragraph) // Splits the paragraph into words.

	for _, word := range words {
		// If adding the next word would exceed maxChunkSize (considering a space if not the first word in a chunk),
		// add the currentChunk to chunks, and reset currentChunk.
		if currentChunk.Len() > 0 && currentChunk.Len()+len(word)+1 > maxChunkSize { // +1 for the space if not the first word
			chunks = append(chunks, currentChunk.String())
			currentChunk.Reset()
		} else if currentChunk.Len() == 0 && len(word) > maxChunkSize { // Word itself exceeds maxChunkSize, split the word
			chunks = append(chunks, word)
			continue
		}

		// Add a space before the word if it's not the beginning of a new chunk.
		if currentChunk.Len() > 0 {
			currentChunk.WriteString(" ")
		}

		// Add the word to the current chunk.
		currentChunk.WriteString(word)
	}

	// After the loop, add any remaining content in currentChunk to chunks.
	if currentChunk.Len() > 0 {
		chunks = append(chunks, currentChunk.String())
	}

	return chunks
}