Skip to content

Commit

Permalink
Merge branch 'apache:main' into test-daily-risk
Browse files Browse the repository at this point in the history
  • Loading branch information
Startrekzky committed Jul 12, 2024
2 parents dde616c + 963d42e commit 3231b38
Show file tree
Hide file tree
Showing 20 changed files with 706 additions and 251 deletions.
5 changes: 4 additions & 1 deletion backend/core/models/domainlayer/code/pull_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ package code

import (
"fmt"
"github.com/apache/incubator-devlake/core/models/domainlayer/ticket"
"time"

"github.com/apache/incubator-devlake/core/models/domainlayer/ticket"

"github.com/apache/incubator-devlake/core/models/domainlayer"
)

Expand All @@ -43,6 +44,8 @@ type PullRequest struct {
AuthorName string `gorm:"type:varchar(100)"`
//User domainUser.User `gorm:"foreignKey:AuthorId"`
AuthorId string `gorm:"type:varchar(100)"`
MergedByName string `gorm:"type:varchar(100)"`
MergedById string `gorm:"type:varchar(100)"`
ParentPrId string `gorm:"index;type:varchar(100)"`
PullRequestKey int
CreatedDate time.Time
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package migrationscripts

import (
"github.com/apache/incubator-devlake/core/context"
"github.com/apache/incubator-devlake/core/errors"
"github.com/apache/incubator-devlake/core/plugin"
)

var _ plugin.MigrationScript = (*addMergedByToPr)(nil)

type pr20240710 struct {
MergedByName string `gorm:"type:varchar(100)"`
MergedById string `gorm:"type:varchar(100)"`
}

func (pr20240710) TableName() string {
return "pull_requests"
}

type addMergedByToPr struct{}

func (*addMergedByToPr) Up(basicRes context.BasicRes) errors.Error {
db := basicRes.GetDal()
if err := db.AutoMigrate(&pr20240710{}); err != nil {
return err
}
return nil
}

func (*addMergedByToPr) Version() uint64 {
return 20240710142101
}

func (*addMergedByToPr) Name() string {
return "add merged by to pull_requests"
}
1 change: 1 addition & 0 deletions backend/core/models/migrationscripts/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,6 @@ func All() []plugin.MigrationScript {
new(addPullRequestIdIndexToPullRequestComments),
new(initIncidentRelatedTables),
new(renameProjectIssueMetrics),
new(addMergedByToPr),
}
}
50 changes: 24 additions & 26 deletions backend/plugins/gitextractor/parser/clone_gitcli.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,25 +89,19 @@ func (g *GitcliCloner) CloneRepo(ctx plugin.SubTaskContext, localDir string) err

}

cmd, err := g.buildCloneCommand(ctx, localDir, since)
if err != nil {
return err
}
err = g.execCloneCommand(cmd)
err := g.execGitCloneCommand(ctx, localDir, since)
if err != nil {
return err
}
// deepen the commits by 1 more step to avoid https://github.com/apache/incubator-devlake/issues/7426
if since != nil {
// fixes error described on https://stackoverflow.com/questions/63878612/git-fatal-error-in-object-unshallow-sha-1
// It might be casued by the commit which being deepen has mulitple parent(e.g. a merge commit), not sure.
repackCmd := exec.CommandContext(ctx.GetContext(), "git", "-C", localDir, "repack", "-d")
if err := repackCmd.Run(); err != nil {
if err := g.execGitCommand(ctx, "-C", localDir, "repack", "-d"); err != nil {
return errors.Default.Wrap(err, "failed to repack the repo")
}
deepenCmd := exec.CommandContext(ctx.GetContext(), "git", "-C", localDir, "fetch", "--deepen=1")
// deepen would fail on a EMPTY repo, ignore the error
if err := deepenCmd.Run(); err != nil {
if err := g.execGitCommand(ctx, "-C", localDir, "fetch", "--deepen=1"); err != nil {
g.logger.Error(err, "failed to deepen the cloned repo")
}
}
Expand All @@ -119,9 +113,22 @@ func (g *GitcliCloner) CloneRepo(ctx plugin.SubTaskContext, localDir string) err
return nil
}

func (g *GitcliCloner) buildCloneCommand(ctx plugin.SubTaskContext, localDir string, since *time.Time) (*exec.Cmd, errors.Error) {
func (g *GitcliCloner) execGitCloneCommand(ctx plugin.SubTaskContext, localDir string, since *time.Time) errors.Error {
taskData := ctx.GetData().(*GitExtractorTaskData)
args := []string{"clone", taskData.Options.Url, localDir, "--bare", "--progress"}
if since != nil {
args = append(args, fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339)))
}
// support time after and diff sync
// support skipping blobs collection
if *taskData.Options.SkipCommitStat {
args = append(args, "--filter=blob:none")
}
return g.execGitCommand(ctx, args...)
}

func (g *GitcliCloner) execGitCommand(ctx plugin.SubTaskContext, args ...string) errors.Error {
taskData := ctx.GetData().(*GitExtractorTaskData)
env := []string{}
// support proxy
if taskData.ParsedURL.Scheme == "http" || taskData.ParsedURL.Scheme == "https" {
Expand All @@ -136,7 +143,7 @@ func (g *GitcliCloner) buildCloneCommand(ctx plugin.SubTaskContext, localDir str
if taskData.Options.Proxy != "" {
parsedProxyURL, e := url.Parse(taskData.Options.Proxy)
if e != nil {
return nil, errors.BadInput.Wrap(e, "failed to parse the proxy URL")
return errors.BadInput.Wrap(e, "failed to parse the proxy URL")
}
proxyCommand := "corkscrew"
sshCmdArgs = append(sshCmdArgs, "-o", fmt.Sprintf(`ProxyCommand="%s %s %s %%h %%p"`, proxyCommand, parsedProxyURL.Hostname(), parsedProxyURL.Port()))
Expand All @@ -146,16 +153,16 @@ func (g *GitcliCloner) buildCloneCommand(ctx plugin.SubTaskContext, localDir str
pkFile, err := os.CreateTemp("", "gitext-pk")
if err != nil {
g.logger.Error(err, "create temp private key file error")
return nil, errors.Default.New("failed to handle the private key")
return errors.Default.New("failed to handle the private key")
}
if _, e := pkFile.WriteString(taskData.Options.PrivateKey + "\n"); e != nil {
g.logger.Error(err, "write private key file error")
return nil, errors.Default.New("failed to write the private key")
return errors.Default.New("failed to write the private key")
}
pkFile.Close()
if e := os.Chmod(pkFile.Name(), 0600); e != nil {
g.logger.Error(err, "chmod private key file error")
return nil, errors.Default.New("failed to modify the private key")
return errors.Default.New("failed to modify the private key")
}

if taskData.Options.Passphrase != "" {
Expand All @@ -169,7 +176,7 @@ func (g *GitcliCloner) buildCloneCommand(ctx plugin.SubTaskContext, localDir str
if ppout, pperr := pp.CombinedOutput(); pperr != nil {
g.logger.Error(pperr, "change private key passphrase error")
g.logger.Info(string(ppout))
return nil, errors.Default.New("failed to decrypt the private key")
return errors.Default.New("failed to decrypt the private key")
}
}
defer os.Remove(pkFile.Name())
Expand All @@ -179,22 +186,13 @@ func (g *GitcliCloner) buildCloneCommand(ctx plugin.SubTaskContext, localDir str
env = append(env, fmt.Sprintf("GIT_SSH_COMMAND=ssh %s", strings.Join(sshCmdArgs, " ")))
}
}
// support time after and diff sync
if since != nil {
args = append(args, fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339)))
}
// support skipping blobs collection
if *taskData.Options.SkipCommitStat {
args = append(args, "--filter=blob:none")
}
// fmt.Printf("args: %v\n", args)
g.logger.Debug("git %v", args)
cmd := exec.CommandContext(ctx.GetContext(), "git", args...)
cmd.Env = env
return cmd, nil
return g.execCommand(cmd)
}

func (g *GitcliCloner) execCloneCommand(cmd *exec.Cmd) errors.Error {
func (g *GitcliCloner) execCommand(cmd *exec.Cmd) errors.Error {
stdout, err := cmd.StdoutPipe()
if err != nil {
g.logger.Error(err, "stdout pipe error")
Expand Down
14 changes: 14 additions & 0 deletions backend/plugins/gitextractor/parser/repo_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,20 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e
default:
}
commitSha := commit.Hash.String()

if commit.NumParents() != 0 {
_, err := commit.Parents().Next()
if err != nil {
if err == plumbing.ErrObjectNotFound {
// Skip calculating commit statistics when there are parent commits, but the first one cannot be fetched from the ODB.
// This usually happens during a shallow clone for incremental collection. Otherwise, we might end up overwriting
// the correct addition/deletion data in the database with an absurdly large addition number.
r.logger.Info("skip commit %s because it has no parent commit", commitSha)
return nil
}
return err
}
}
codeCommit := &code.Commit{
Sha: commitSha,
Message: commit.Message,
Expand Down
15 changes: 11 additions & 4 deletions backend/plugins/gitextractor/parser/repo_libgit2.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,17 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext)
if commit == nil {
return nil
}
var parent *git.Commit
if commit.ParentCount() > 0 {
parent = commit.Parent(0)
// Skip calculating commit statistics when there are parent commits, but the first one cannot be fetched from the ODB.
// This usually happens during a shallow clone for incremental collection. Otherwise, we might end up overwriting
// the correct addition/deletion data in the database with an absurdly large addition number.
if parent == nil {
r.logger.Info("skip commit %s because it has no parent commit", commit.Id().String())
return nil
}
}
commitSha := commit.Id().String()
r.logger.Debug("process commit: %s", commitSha)
c := &code.Commit{
Expand All @@ -303,10 +314,6 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext)
if err != nil {
return err
}
var parent *git.Commit
if commit.ParentCount() > 0 {
parent = commit.Parent(0)
}

if !*taskOpts.SkipCommitStat {
var stats *git.DiffStats
Expand Down
5 changes: 3 additions & 2 deletions backend/plugins/github/e2e/pr_review_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ func TestPrReviewDataFlow(t *testing.T) {
"./snapshot_tables/_tool_github_reviewers.csv",
[]string{
"connection_id",
"github_id",
"reviewer_id",
"pull_request_id",
"login",
"username",
"name",
"_raw_data_params",
"_raw_data_table",
"_raw_data_id",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
connection_id,github_id,pull_request_id,login,_raw_data_params,_raw_data_table,_raw_data_id,_raw_data_remark
1,2813260,325179595,KevinBaiSg,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,39,
1,7496278,308859272,panjf2000,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,21,
1,7496278,316337433,panjf2000,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,22,
1,7496278,325179595,panjf2000,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,38,
1,8923413,308859272,choleraehyq,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,20,
1,8923413,316337433,choleraehyq,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,24,
connection_id,reviewer_id,pull_request_id,username,name,_raw_data_params,_raw_data_table,_raw_data_id,_raw_data_remark
1,2813260,325179595,KevinBaiSg,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,39,
1,7496278,308859272,panjf2000,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,21,
1,7496278,316337433,panjf2000,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,22,
1,7496278,325179595,panjf2000,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,38,
1,8923413,308859272,choleraehyq,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,20,
1,8923413,316337433,choleraehyq,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_pull_request_reviews,24,
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package migrationscripts

import (
"github.com/apache/incubator-devlake/core/context"
"github.com/apache/incubator-devlake/core/errors"
"github.com/apache/incubator-devlake/core/plugin"
)

var _ plugin.MigrationScript = (*addMergedByToPr)(nil)

type pr20240710 struct {
MergedByName string `gorm:"type:varchar(100)"`
MergedById int
}

func (pr20240710) TableName() string {
return "_tool_github_pull_requests"
}

type addMergedByToPr struct{}

func (*addMergedByToPr) Up(basicRes context.BasicRes) errors.Error {
db := basicRes.GetDal()
if err := db.AutoMigrate(&pr20240710{}); err != nil {
return err
}
return nil
}

func (*addMergedByToPr) Version() uint64 {
return 20240710142100
}

func (*addMergedByToPr) Name() string {
return "add merged by to _tool_github_pull_requests"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package migrationscripts

import (
"github.com/apache/incubator-devlake/core/context"
"github.com/apache/incubator-devlake/core/errors"
coreArchived "github.com/apache/incubator-devlake/core/models/migrationscripts/archived"
"github.com/apache/incubator-devlake/core/plugin"
"github.com/apache/incubator-devlake/plugins/github/models/migrationscripts/archived"
)

var _ plugin.MigrationScript = (*restructReviewer)(nil)

type reviewer20240711 struct {
ConnectionId uint64 `gorm:"primaryKey"`
ReviewerId int `gorm:"primaryKey"`
PullRequestId int `gorm:"primaryKey"`
Name string `gorm:"type:varchar(255)"`
Username string `gorm:"type:varchar(255)"`
State string `gorm:"type:varchar(255)"`
AvatarUrl string `gorm:"type:varchar(255)"`
WebUrl string `gorm:"type:varchar(255)"`
coreArchived.NoPKModel
}

func (reviewer20240711) TableName() string {
return "_tool_github_reviewers"
}

type restructReviewer struct{}

func (*restructReviewer) Up(basicRes context.BasicRes) errors.Error {
db := basicRes.GetDal()
if err := db.DropTables(&archived.GithubReviewer{}); err != nil {
return err
}
if err := db.AutoMigrate(&reviewer20240711{}); err != nil {
return err
}
return nil
}

func (*restructReviewer) Version() uint64 {
return 20240710142104
}

func (*restructReviewer) Name() string {
return "restruct reviewer table"
}
Loading

0 comments on commit 3231b38

Please sign in to comment.