Skip to content

Commit

Permalink
Add skip archive support (#2257)
Browse files Browse the repository at this point in the history
  • Loading branch information
dustin-decker committed Dec 22, 2023
1 parent f699f60 commit 7d93adc
Show file tree
Hide file tree
Showing 10 changed files with 591 additions and 457 deletions.
1 change: 1 addition & 0 deletions hack/snifftest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ func main() {
}
},
true,
false,
)

logger.Info("scanning repo", "repo", r)
Expand Down
5 changes: 5 additions & 0 deletions pkg/handlers/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type Archive struct {
size int
currentDepth int
skipBinaries bool
skipArchives bool
}

// New creates a new Archive handler with the provided options.
Expand All @@ -72,6 +73,10 @@ func SetArchiveMaxTimeout(timeout time.Duration) {

// FromFile extracts the files from an archive.
func (a *Archive) FromFile(originalCtx logContext.Context, data io.Reader) chan []byte {
if a.skipArchives {
return nil
}

archiveChan := make(chan []byte, defaultBufferSize)
go func() {
ctx, cancel := logContext.WithTimeout(originalCtx, maxTimeout)
Expand Down
25 changes: 25 additions & 0 deletions pkg/handlers/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,31 @@ func TestExtractDebContent(t *testing.T) {
assert.Equal(t, expectedLength, len(string(content)))
}

func TestSkipArchive(t *testing.T) {
file, err := os.Open("testdata/test.tgz")
assert.Nil(t, err)
defer file.Close()

reader, err := diskbufferreader.New(file)
assert.NoError(t, err)

ctx := logContext.Background()

chunkCh := make(chan *sources.Chunk)
go func() {
defer close(chunkCh)
ok := HandleFile(ctx, reader, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh}, WithSkipArchives(true))
assert.False(t, ok)
}()

wantCount := 0
count := 0
for range chunkCh {
count++
}
assert.Equal(t, wantCount, count)
}

func TestExtractTarContent(t *testing.T) {
file, err := os.Open("testdata/test.tgz")
assert.Nil(t, err)
Expand Down
13 changes: 13 additions & 0 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ func WithSkipBinaries(skip bool) Option {
}
}

// WithSkipArchives returns a Option that configures whether to skip archive files.
func WithSkipArchives(skip bool) Option {
return func(h Handler) {
if a, ok := h.(*Archive); ok {
a.skipArchives = skip
}
}
}

type Handler interface {
FromFile(logContext.Context, io.Reader) chan []byte
IsFiletype(logContext.Context, io.Reader) (io.Reader, bool)
Expand Down Expand Up @@ -84,6 +93,10 @@ func processHandler(ctx logContext.Context, h Handler, reReader *diskbufferreade
}

func handleChunks(ctx logContext.Context, handlerChan chan []byte, chunkSkel *sources.Chunk, reporter sources.ChunkReporter) bool {
if handlerChan == nil {
return false
}

for {
select {
case data, open := <-handlerChan:
Expand Down
976 changes: 519 additions & 457 deletions pkg/pb/sourcespb/sources.pb.go

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions pkg/pb/sourcespb/sources.pb.validate.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type Git struct {
metrics metrics
concurrency *semaphore.Weighted
skipBinaries bool
skipArchives bool
}

type metrics struct {
Expand All @@ -66,6 +67,7 @@ type metrics struct {

func NewGit(sourceType sourcespb.SourceType, jobID sources.JobID, sourceID sources.SourceID, sourceName string, verify bool, concurrency int,
sourceMetadataFunc func(file, email, commit, timestamp, repository string, line int64) *source_metadatapb.MetaData, skipBinaries bool,
skipArchives bool,
) *Git {
return &Git{
sourceType: sourceType,
Expand All @@ -76,6 +78,7 @@ func NewGit(sourceType sourcespb.SourceType, jobID sources.JobID, sourceID sourc
verify: verify,
concurrency: semaphore.NewWeighted(int64(concurrency)),
skipBinaries: skipBinaries,
skipArchives: skipArchives,
}
}

Expand Down Expand Up @@ -178,6 +181,7 @@ func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, so
}
},
conn.GetSkipBinaries(),
conn.GetSkipArchives(),
)
return nil
}
Expand Down Expand Up @@ -1014,6 +1018,10 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.
}
}

if s.skipArchives {
handlerOpts = append(handlerOpts, handlers.WithSkipArchives(true))
}

cmd := exec.Command("git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path)

var stderr bytes.Buffer
Expand Down
1 change: 1 addition & 0 deletions pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
}
},
conn.GetSkipBinaries(),
conn.GetSkipArchives(),
)

return nil
Expand Down
1 change: 1 addition & 0 deletions pkg/sources/gitlab/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ func (s *Source) Init(_ context.Context, name string, jobId sources.JobID, sourc
}
},
conn.GetSkipBinaries(),
conn.GetSkipArchives(),
)

return nil
Expand Down
6 changes: 6 additions & 0 deletions proto/sources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ message Bitbucket {
repeated string repositories = 5;
repeated string ignore_repos = 6;
bool skip_binaries = 7;
bool skip_archives = 8;
}

message CircleCI {
Expand Down Expand Up @@ -196,6 +197,7 @@ message Git {
// like head, base, bare, etc.
string uri = 13; // repository URL. https://, file://, or ssh://
bool skip_binaries = 14;
bool skip_archives = 15;
}

message GitLab {
Expand All @@ -208,6 +210,7 @@ message GitLab {
repeated string repositories = 5;
repeated string ignore_repos = 6;
bool skip_binaries = 7;
bool skip_archives = 8;
}

message GitHub {
Expand All @@ -230,6 +233,7 @@ message GitHub {
bool include_issue_comments = 15;
bool include_gist_comments = 16;
bool skip_binaries = 17;
bool skip_archives = 18;
}

message GoogleDrive {
Expand Down Expand Up @@ -301,6 +305,7 @@ message Gerrit {
}
repeated string projects = 4;
bool skip_binaries = 5;
bool skip_archives = 6;
}

message Jenkins {
Expand Down Expand Up @@ -369,4 +374,5 @@ message AzureRepos {
repeated string include_projects = 10;
repeated string ignore_projects = 11;
bool skip_binaries = 12;
bool skip_archives = 13;
}

0 comments on commit 7d93adc

Please sign in to comment.