fix(github): skip binary files
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
@@ -3,11 +3,13 @@ package actions
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/google/go-github/v69/github"
|
"github.com/google/go-github/v69/github"
|
||||||
"github.com/mudler/LocalAGI/core/types"
|
"github.com/mudler/LocalAGI/core/types"
|
||||||
"github.com/mudler/LocalAGI/pkg/config"
|
"github.com/mudler/LocalAGI/pkg/config"
|
||||||
|
"github.com/mudler/LocalAGI/pkg/xlog"
|
||||||
"github.com/sashabaranov/go-openai/jsonschema"
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -28,6 +30,30 @@ func NewGithubRepositoryGetAllContent(config map[string]string) *GithubRepositor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isTextFile checks if a file is likely to be a text file based on its extension
|
||||||
|
func isTextFile(path string) bool {
|
||||||
|
// List of common text/code file extensions
|
||||||
|
textExtensions := map[string]bool{
|
||||||
|
".txt": true, ".md": true, ".go": true, ".py": true, ".js": true,
|
||||||
|
".ts": true, ".jsx": true, ".tsx": true, ".html": true, ".css": true,
|
||||||
|
".json": true, ".yaml": true, ".yml": true, ".xml": true, ".sql": true,
|
||||||
|
".sh": true, ".bash": true, ".zsh": true, ".rb": true, ".php": true,
|
||||||
|
".java": true, ".c": true, ".cpp": true, ".h": true, ".hpp": true,
|
||||||
|
".rs": true, ".swift": true, ".kt": true, ".scala": true, ".lua": true,
|
||||||
|
".pl": true, ".r": true, ".m": true, ".mm": true, ".f": true,
|
||||||
|
".f90": true, ".f95": true, ".f03": true, ".f08": true, ".f15": true,
|
||||||
|
".hs": true, ".lhs": true, ".erl": true, ".hrl": true, ".ex": true,
|
||||||
|
".exs": true, ".eex": true, ".leex": true, ".heex": true, ".config": true,
|
||||||
|
".toml": true, ".ini": true, ".conf": true, ".env": true, ".gitignore": true,
|
||||||
|
".dockerignore": true, ".editorconfig": true, ".prettierrc": true, ".eslintrc": true,
|
||||||
|
".babelrc": true, ".npmrc": true, ".yarnrc": true, ".lock": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the file extension
|
||||||
|
ext := strings.ToLower(filepath.Ext(path))
|
||||||
|
return textExtensions[ext]
|
||||||
|
}
|
||||||
|
|
||||||
func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Context, path string, owner string, repository string) (string, error) {
|
func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Context, path string, owner string, repository string) (string, error) {
|
||||||
var result strings.Builder
|
var result strings.Builder
|
||||||
|
|
||||||
@@ -47,6 +73,13 @@ func (g *GithubRepositoryGetAllContent) getContentRecursively(ctx context.Contex
|
|||||||
}
|
}
|
||||||
result.WriteString(subContent)
|
result.WriteString(subContent)
|
||||||
} else if item.GetType() == "file" {
|
} else if item.GetType() == "file" {
|
||||||
|
// Skip binary/image files
|
||||||
|
if !isTextFile(item.GetPath()) {
|
||||||
|
xlog.Warn("Skipping non-text file: ", "file", item.GetPath())
|
||||||
|
result.WriteString(fmt.Sprintf("Skipping non-text file: %s\n", item.GetPath()))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Get file content
|
// Get file content
|
||||||
fileContent, _, _, err := g.client.Repositories.GetContents(ctx, owner, repository, item.GetPath(), nil)
|
fileContent, _, _, err := g.client.Repositories.GetContents(ctx, owner, repository, item.GetPath(), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user