From 5e5224da25a201f82d2286d2908d279246208dc5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 2 May 2025 22:34:37 +0200 Subject: [PATCH] fix(github): skip binary files Signed-off-by: Ettore Di Giacinto --- .../actions/githubrepositorygetallcontent.go | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/services/actions/githubrepositorygetallcontent.go b/services/actions/githubrepositorygetallcontent.go index 0dd386e..ad1ee9c 100644 --- a/services/actions/githubrepositorygetallcontent.go +++ b/services/actions/githubrepositorygetallcontent.go @@ -3,11 +3,13 @@ package actions import ( "context" "fmt" + "path/filepath" "strings" "github.com/google/go-github/v69/github" "github.com/mudler/LocalAGI/core/types" "github.com/mudler/LocalAGI/pkg/config" + "github.com/mudler/LocalAGI/pkg/xlog" "github.com/sashabaranov/go-openai/jsonschema" ) @@ -28,6 +30,30 @@ func NewGithubRepositoryGetAllContent(config map[string]string) *GithubRepositor } } +// isTextFile checks if a file is likely to be a text file based on its extension +func isTextFile(path string) bool { + // List of common text/code file extensions + textExtensions := map[string]bool{ + ".txt": true, ".md": true, ".go": true, ".py": true, ".js": true, + ".ts": true, ".jsx": true, ".tsx": true, ".html": true, ".css": true, + ".json": true, ".yaml": true, ".yml": true, ".xml": true, ".sql": true, + ".sh": true, ".bash": true, ".zsh": true, ".rb": true, ".php": true, + ".java": true, ".c": true, ".cpp": true, ".h": true, ".hpp": true, + ".rs": true, ".swift": true, ".kt": true, ".scala": true, ".lua": true, + ".pl": true, ".r": true, ".m": true, ".mm": true, ".f": true, + ".f90": true, ".f95": true, ".f03": true, ".f08": true, ".f15": true, + ".hs": true, ".lhs": true, ".erl": true, ".hrl": true, ".ex": true, + ".exs": true, ".eex": true, ".leex": true, ".heex": true, ".config": true, + ".toml": true, ".ini": true, ".conf": true, ".env": true, ".gitignore": true, + ".dockerignore": true, ".editorconfig": true, ".prettierrc": true, ".eslintrc": true, + ".babelrc": true, ".npmrc": true, ".yarnrc": true, ".lock": true, + } + + // Get the file extension + ext := strings.ToLower(filepath.Ext(path)) + return textExtensions[ext] +} + func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Context, path string, owner string, repository string) (string, error) { var result strings.Builder @@ -47,6 +73,13 @@ func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Contex } result.WriteString(subContent) } else if item.GetType() == "file" { + // Skip binary/image files + if !isTextFile(item.GetPath()) { + xlog.Warn("Skipping non-text file: ", "file", item.GetPath()) + result.WriteString(fmt.Sprintf("Skipping non-text file: %s\n", item.GetPath())) + continue + } + // Get file content fileContent, _, _, err := g.client.Repositories.GetContents(ctx, owner, repository, item.GetPath(), nil) if err != nil {