fix(github): skip binary files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2025-05-02 22:34:37 +02:00
parent c529f880d3
commit 5e5224da25

View File

@@ -3,11 +3,13 @@ package actions
import ( import (
"context" "context"
"fmt" "fmt"
"path/filepath"
"strings" "strings"
"github.com/google/go-github/v69/github" "github.com/google/go-github/v69/github"
"github.com/mudler/LocalAGI/core/types" "github.com/mudler/LocalAGI/core/types"
"github.com/mudler/LocalAGI/pkg/config" "github.com/mudler/LocalAGI/pkg/config"
"github.com/mudler/LocalAGI/pkg/xlog"
"github.com/sashabaranov/go-openai/jsonschema" "github.com/sashabaranov/go-openai/jsonschema"
) )
@@ -28,6 +30,30 @@ func NewGithubRepositoryGetAllContent(config map[string]string) *GithubRepositor
} }
} }
// isTextFile checks if a file is likely to be a text file based on its extension
func isTextFile(path string) bool {
// List of common text/code file extensions
textExtensions := map[string]bool{
".txt": true, ".md": true, ".go": true, ".py": true, ".js": true,
".ts": true, ".jsx": true, ".tsx": true, ".html": true, ".css": true,
".json": true, ".yaml": true, ".yml": true, ".xml": true, ".sql": true,
".sh": true, ".bash": true, ".zsh": true, ".rb": true, ".php": true,
".java": true, ".c": true, ".cpp": true, ".h": true, ".hpp": true,
".rs": true, ".swift": true, ".kt": true, ".scala": true, ".lua": true,
".pl": true, ".r": true, ".m": true, ".mm": true, ".f": true,
".f90": true, ".f95": true, ".f03": true, ".f08": true, ".f15": true,
".hs": true, ".lhs": true, ".erl": true, ".hrl": true, ".ex": true,
".exs": true, ".eex": true, ".leex": true, ".heex": true, ".config": true,
".toml": true, ".ini": true, ".conf": true, ".env": true, ".gitignore": true,
".dockerignore": true, ".editorconfig": true, ".prettierrc": true, ".eslintrc": true,
".babelrc": true, ".npmrc": true, ".yarnrc": true, ".lock": true,
}
// Get the file extension
ext := strings.ToLower(filepath.Ext(path))
return textExtensions[ext]
}
func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Context, path string, owner string, repository string) (string, error) { func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Context, path string, owner string, repository string) (string, error) {
var result strings.Builder var result strings.Builder
@@ -47,6 +73,13 @@ func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Contex
} }
result.WriteString(subContent) result.WriteString(subContent)
} else if item.GetType() == "file" { } else if item.GetType() == "file" {
// Skip binary/image files
if !isTextFile(item.GetPath()) {
xlog.Warn("Skipping non-text file: ", "file", item.GetPath())
result.WriteString(fmt.Sprintf("Skipping non-text file: %s\n", item.GetPath()))
continue
}
// Get file content // Get file content
fileContent, _, _, err := g.client.Repositories.GetContents(ctx, owner, repository, item.GetPath(), nil) fileContent, _, _, err := g.client.Repositories.GetContents(ctx, owner, repository, item.GetPath(), nil)
if err != nil { if err != nil {