diff --git a/pkg/xstrings/split.go b/pkg/xstrings/split.go new file mode 100644 index 0000000..8bfcfff --- /dev/null +++ b/pkg/xstrings/split.go @@ -0,0 +1,72 @@ +package xstrings + +import ( + "strings" +) + +// SplitTextByLength splits text into chunks of specified maxLength, +// preserving complete words and special characters like newlines. +// It returns a slice of strings, each with length <= maxLength. +func SplitParagraph(text string, maxLength int) []string { + // Handle edge cases + if maxLength <= 0 || len(text) == 0 { + return []string{text} + } + + var chunks []string + remainingText := text + + for len(remainingText) > 0 { + // If remaining text fits in a chunk, add it and we're done + if len(remainingText) <= maxLength { + chunks = append(chunks, remainingText) + break + } + + // Try to find a good split point near the max length + splitIndex := maxLength + + // Look backward from the max length to find a space or newline + for splitIndex > 0 && !isWhitespace(rune(remainingText[splitIndex])) { + splitIndex-- + } + + // If we couldn't find a good split point (no whitespace), + // look forward for the next whitespace + if splitIndex == 0 { + splitIndex = maxLength + // If we can't find whitespace forward, we'll have to split a word + for splitIndex < len(remainingText) && !isWhitespace(rune(remainingText[splitIndex])) { + splitIndex++ + } + + // If we still couldn't find whitespace, take the whole string + if splitIndex == len(remainingText) { + chunks = append(chunks, remainingText) + break + } + } + + // Add the chunk up to the split point + chunk := remainingText[:splitIndex] + + // Preserve trailing newlines with the current chunk + if splitIndex < len(remainingText) && remainingText[splitIndex] == '\n' { + chunk += string(remainingText[splitIndex]) + splitIndex++ + } + + chunks = append(chunks, chunk) + + // Remove leading whitespace from the next chunk + remainingText = remainingText[splitIndex:] + remainingText = strings.TrimLeftFunc(remainingText, isWhitespace) + } + + return chunks +} + +// Helper function to determine if a character is whitespace +func isWhitespace(r rune) bool { + return r == ' ' || r == '\t' || r == '\n' || r == '\r' +} diff --git a/pkg/xstrings/split_test.go b/pkg/xstrings/split_test.go new file mode 100644 index 0000000..d0164b8 --- /dev/null +++ b/pkg/xstrings/split_test.go @@ -0,0 +1,79 @@ +package xstrings_test + +import ( + xtrings "github.com/mudler/LocalAgent/pkg/xstrings" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("SplitParagraph", func() { + It("should return the text as a single chunk if it's shorter than maxLen", func() { + text := "Short text" + maxLen := 20 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{"Short text"})) + }) + + It("should split the text into chunks of maxLen without truncating words", func() { + text := "This is a longer text that needs to be split into chunks." + maxLen := 10 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{"This is a", "longer", "text that", "needs to", "be split", "into", "chunks."})) + }) + + It("should handle texts with multiple spaces and newlines correctly", func() { + text := "This is\na\ntext with\n\nmultiple spaces and\nnewlines." + maxLen := 10 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{"This is\na\n", "text with\n", "multiple", "spaces ", "and\n", "newlines."})) + }) + + It("should handle a text with a single word longer than maxLen", func() { + text := "supercalifragilisticexpialidocious" + maxLen := 10 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{"supercalifragilisticexpialidocious"})) + }) + + It("should handle a text with empty lines", func() { + text := "line1\n\nline2" + maxLen := 10 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{"line1\n\n", "line2"})) + }) + + It("should handle a text with leading and trailing spaces", func() { + text := " leading spaces and trailing spaces " + maxLen := 15 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{" leading", "spaces and", "trailing spaces"})) + }) + + It("should handle a text with only spaces", func() { + text := " " + maxLen := 10 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{" "})) + }) + + It("should handle empty string", func() { + text := "" + maxLen := 10 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{""})) + }) + + It("should handle a text with only newlines", func() { + text := "\n\n\n" + maxLen := 10 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{"\n\n\n"})) + }) + + It("should handle a text with special characters", func() { + text := "This is a text with special characters !@#$%^&*()" + maxLen := 20 + result := xtrings.SplitParagraph(text, maxLen) + Expect(result).To(Equal([]string{"This is a text with", "special characters", "!@#$%^&*()"})) + }) +}) diff --git a/pkg/xstrings/uniq.go b/pkg/xstrings/uniq.go new file mode 100644 index 0000000..055f88c --- /dev/null +++ b/pkg/xstrings/uniq.go @@ -0,0 +1,15 @@ +package xstrings + +type Comparable interface{ ~int | ~int64 | ~string } + +func UniqueSlice[T Comparable](s []T) []T { + keys := make(map[T]bool) + list := []T{} + for _, entry := range s { + if _, value := keys[entry]; !value { + keys[entry] = true + list = append(list, entry) + } + } + return list +} diff --git a/pkg/xstrings/xstrings_suite_test.go b/pkg/xstrings/xstrings_suite_test.go new file mode 100644 index 0000000..6990407 --- /dev/null +++ b/pkg/xstrings/xstrings_suite_test.go @@ -0,0 +1,13 @@ +package xstrings_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestXStrings(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "XStrings test suite") +} diff --git a/services/connectors/slack.go b/services/connectors/slack.go index 7d14ad2..fd0f436 100644 --- a/services/connectors/slack.go +++ b/services/connectors/slack.go @@ -7,12 +7,12 @@ import ( "io/ioutil" "log" "os" - "regexp" "strings" "sync" "time" "github.com/mudler/LocalAgent/pkg/xlog" + "github.com/mudler/LocalAgent/pkg/xstrings" "github.com/mudler/LocalAgent/services/actions" "github.com/sashabaranov/go-openai" @@ -137,23 +137,11 @@ func replaceUserIDsWithNamesInMessage(api *slack.Client, message string) string return message } -func uniqueStringSlice(s []string) []string { - keys := make(map[string]bool) - list := []string{} - for _, entry := range s { - if _, value := keys[entry]; !value { - keys[entry] = true - list = append(list, entry) - } - } - return list -} - func generateAttachmentsFromJobResponse(j *types.JobResult) (attachments []slack.Attachment) { for _, state := range j.State { // coming from the search action if urls, exists := state.Metadata[actions.MetadataUrls]; exists { - for _, url := range uniqueStringSlice(urls.([]string)) { + for _, url := range xstrings.UniqueSlice(urls.([]string)) { attachment := slack.Attachment{ Title: "URL", TitleLink: url, @@ -165,7 +153,7 @@ func generateAttachmentsFromJobResponse(j *types.JobResult) (attachments []slack // coming from the gen image actions if imagesUrls, exists := state.Metadata[actions.MetadataImages]; exists { - for _, url := range uniqueStringSlice(imagesUrls.([]string)) { + for _, url := range xstrings.UniqueSlice(imagesUrls.([]string)) { attachment := slack.Attachment{ Title: "Image", TitleLink: url, @@ -312,45 +300,11 @@ func encodeImageFromURL(imageBytes bytes.Buffer) (string, error) { return base64Image, nil } -// SplitText splits a long text into chunks of a specified maximum length without truncating words and preserves special characters. -func splitText(text string, maxLen int) []string { - if len(text) <= maxLen { - return []string{text} - } - - var chunks []string - lines := strings.Split(text, "\n") // Split text by newlines first - whitespaceRegex := regexp.MustCompile(`\s+`) - - for _, line := range lines { - var chunk string - words := whitespaceRegex.Split(line, -1) // Splitting the line into words while preserving whitespace - - for _, word := range words { - if len(chunk)+len(word)+1 > maxLen { // +1 for space - chunks = append(chunks, chunk) - chunk = word - } else { - if chunk != "" { - chunk += " " - } - chunk += word - } - } - - if chunk != "" { - chunks = append(chunks, chunk) - } - } - - return chunks -} - func replyWithPostMessage(finalResponse string, api *slack.Client, ev *slackevents.MessageEvent, postMessageParams slack.PostMessageParameters, res *types.JobResult) { if len(finalResponse) > 4000 { // split response in multiple messages, and update the first - messages := splitText(finalResponse, 4000) + messages := xstrings.SplitParagraph(finalResponse, 3000) for i, message := range messages { if i == 0 { @@ -386,7 +340,7 @@ func replyToUpdateMessage(finalResponse string, api *slack.Client, ev *slackeven if len(finalResponse) > 3000 { // split response in multiple messages, and update the first - messages := splitText(finalResponse, 3000) + messages := xstrings.SplitParagraph(finalResponse, 3000) _, _, _, err := api.UpdateMessage( ev.Channel, diff --git a/services/connectors/telegram.go b/services/connectors/telegram.go index 867e822..66e0e20 100644 --- a/services/connectors/telegram.go +++ b/services/connectors/telegram.go @@ -14,6 +14,7 @@ import ( "github.com/mudler/LocalAgent/core/agent" "github.com/mudler/LocalAgent/core/types" "github.com/mudler/LocalAgent/pkg/xlog" + "github.com/mudler/LocalAgent/pkg/xstrings" "github.com/mudler/LocalAgent/services/actions" "github.com/sashabaranov/go-openai" ) @@ -97,7 +98,7 @@ func (t *Telegram) handleUpdate(ctx context.Context, b *bot.Bot, a *agent.Agent, // coming from the gen image actions if imagesUrls, exists := res.Metadata[actions.MetadataImages]; exists { - for _, url := range uniqueStringSlice(imagesUrls.([]string)) { + for _, url := range xstrings.UniqueSlice(imagesUrls.([]string)) { b.SendPhoto(ctx, &bot.SendPhotoParams{ ChatID: update.Message.Chat.ID, Photo: models.InputFileString{