-
Notifications
You must be signed in to change notification settings - Fork 23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add support for shallow cloning repos #363
base: main
Are you sure you want to change the base?
Conversation
Signed-off-by: Mmadu Manasseh <[email protected]>
ab10be3
to
0837db6
Compare
Temporary image available at |
Signed-off-by: Mmadu Manasseh <[email protected]>
Signed-off-by: Mmadu Manasseh <[email protected]>
pkg/config/config.go
Outdated
VcsUploadUrl string `mapstructure:"vcs-upload-url"` // github enterprise upload URL | ||
VcsToken string `mapstructure:"vcs-token"` | ||
VcsType string `mapstructure:"vcs-type"` | ||
EnableShallowClone bool `mapstructure:"enable-shallow-clone"` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should prefix the key with vcs-
to make it clear what this configures.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@djeebus I've used the RepoShallowClone
for this instead, what do you think?
I feel Vcs
prefixed variables are for vcs configs and this is not "completely" one of them.
@MeNsaaH we got caught by dependencies missing several times (e.g. had to implement a kustomize walker) |
a52748f
to
2c06ea0
Compare
Signed-off-by: Mmadu Manasseh <[email protected]>
2c06ea0
to
acd97a9
Compare
Signed-off-by: Mmadu Manasseh <[email protected]>
a80d3ef
to
012a083
Compare
Mergecat's ReviewClick to read mergecats review!😼 Mergecat review of pkg/utils.go@@ -16,6 +16,7 @@ func Pointer[T interface{}](item T) *T {
}
func WipeDir(dir string) {
+ log.Debug().Str("path", dir).Msg("wiping path")
if err := os.RemoveAll(dir); err != nil {
log.Error().
Err(err). Feedback & Suggestions:
😼 Mergecat review of pkg/git/manager.go@@ -22,8 +22,11 @@ func NewRepoManager(cfg config.ServerConfig) *RepoManager {
return &RepoManager{cfg: cfg}
}
-func (rm *RepoManager) Clone(ctx context.Context, cloneUrl, branchName string) (*Repo, error) {
+func (rm *RepoManager) Clone(ctx context.Context, cloneUrl, branchName string, shallow bool) (*Repo, error) {
repo := New(rm.cfg, cloneUrl, branchName)
+ if shallow {
+ repo.Shallow = true
+ }
if err := repo.Clone(ctx); err != nil {
return nil, errors.Wrap(err, "failed to clone repository") Feedback & Suggestions:
😼 Mergecat review of cmd/root.go@@ -119,6 +119,9 @@ func init() {
newStringOpts().
withDefault("kubechecks again"))
stringSliceFlag(flags, "additional-apps-namespaces", "Additional namespaces other than the ArgoCDNamespace to monitor for applications.")
+ boolFlag(flags, "repo-shallow-clone", "Enable shallow cloning for all git repos.",
+ newBoolOpts().
+ withDefault(false))
panicIfError(viper.BindPFlags(flags))
setupLogOutput() Feedback & Suggestions:
😼 Mergecat review of pkg/events/check.go@@ -55,7 +55,7 @@ type CheckEvent struct {
}
type repoManager interface {
- Clone(ctx context.Context, cloneURL, branchName string) (*git.Repo, error)
+ Clone(ctx context.Context, cloneURL, branchName string, shallow bool) (*git.Repo, error)
}
func generateMatcher(ce *CheckEvent, repo *git.Repo) error {
@@ -192,7 +192,7 @@ func (ce *CheckEvent) getRepo(ctx context.Context, cloneURL, branchName string)
return repo, nil
}
- repo, err = ce.repoManager.Clone(ctx, cloneURL, branchName)
+ repo, err = ce.repoManager.Clone(ctx, cloneURL, branchName, ce.ctr.Config.RepoShallowClone)
if err != nil {
return nil, errors.Wrap(err, "failed to clone repo")
} Feedback & Suggestions:
😼 Mergecat review of pkg/config/config.go@@ -79,6 +79,7 @@ type ServerConfig struct {
MonitorAllApplications bool `mapstructure:"monitor-all-applications"`
OpenAIAPIToken string `mapstructure:"openai-api-token"`
RepoRefreshInterval time.Duration `mapstructure:"repo-refresh-interval"`
+ RepoShallowClone bool `mapstructure:"repo-shallow-clone"`
SchemasLocations []string `mapstructure:"schemas-location"`
ShowDebugInfo bool `mapstructure:"show-debug-info"`
TidyOutdatedCommentsMode string `mapstructure:"tidy-outdated-comments-mode"` Feedback & Suggestions:
😼 Mergecat review of localdev/kubechecks/values.yaml@@ -22,16 +22,17 @@ configMap:
#
# KUBECHECKS_LABEL_FILTER: "test" # On your PR/MR, prefix this with "kubechecks:"
# KUBECHECKS_SCHEMAS_LOCATION: https://github.com/zapier/kubecheck-schemas.git
+ KUBECHECKS_REPO_REFRESH_INTERVAL: 30s
KUBECHECKS_TIDY_OUTDATED_COMMENTS_MODE: "delete"
KUBECHECKS_ENABLE_CONFTEST: "false"
-
+ KUBECHECKS_REPO_SHALLOW_CLONE: "true"
deployment:
annotations:
reloader.stakater.com/auto: "true"
image:
- pullPolicy: Never
+ pullPolicy: IfNotPresent
name: "kubechecks"
tag: ""
Feedback & Suggestions:
😼 Mergecat review of docs/usage.md@@ -70,6 +70,7 @@ The full list of supported environment variables is described below:
|`KUBECHECKS_POLICIES_LOCATION`|Sets rego policy locations to be used for every check request. Can be common path inside the repos being checked or git urls in either git or http(s) format.|`[./policies]`|
|`KUBECHECKS_REPLAN_COMMENT_MSG`|comment message which re-triggers kubechecks on PR.|`kubechecks again`|
|`KUBECHECKS_REPO_REFRESH_INTERVAL`|Interval between static repo refreshes (for schemas and policies).|`5m`|
+|`KUBECHECKS_REPO_SHALLOW_CLONE`|Enable shallow cloning for all git repos.|`false`|
|`KUBECHECKS_SCHEMAS_LOCATION`|Sets schema locations to be used for every check request. Can be a common path on the host or git urls in either git or http(s) format.|`[]`|
|`KUBECHECKS_SHOW_DEBUG_INFO`|Set to true to print debug info to the footer of MR comments.|`false`|
|`KUBECHECKS_TIDY_OUTDATED_COMMENTS_MODE`|Sets the mode to use when tidying outdated comments. One of hide, delete.|`hide`| Feedback & Suggestions:
😼 Mergecat review of cmd/locations_test.go@@ -19,7 +19,7 @@ type fakeCloner struct {
err error
}
-func (f *fakeCloner) Clone(_ context.Context, cloneUrl, branchName string) (*git.Repo, error) {
+func (f *fakeCloner) Clone(_ context.Context, cloneUrl, branchName string, shallow bool) (*git.Repo, error) {
f.cloneUrl = cloneUrl
f.branchName = branchName
return f.result, f.err
@@ -43,7 +43,7 @@ func TestMaybeCloneGitUrl_NonGitUrl(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: nil, err: nil}
- actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
+ actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.NoError(t, err)
assert.Equal(t, "", fc.branchName)
assert.Equal(t, "", fc.cloneUrl)
@@ -137,7 +137,7 @@ func TestMaybeCloneGitUrl_HappyPath(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: nil}
- actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
+ actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.NoError(t, err)
assert.Equal(t, tc.expected.branch, fc.branchName)
assert.Equal(t, tc.expected.cloneUrl, fc.cloneUrl)
@@ -165,7 +165,7 @@ func TestMaybeCloneGitUrl_URLError(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: nil}
- result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
+ result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.ErrorContains(t, err, tc.expected)
require.Equal(t, "", result)
})
@@ -193,7 +193,7 @@ func TestMaybeCloneGitUrl_CloneError(t *testing.T) {
defer cancel()
fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: tc.cloneError}
- result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
+ result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.ErrorContains(t, err, tc.expected)
require.Equal(t, "", result)
}) Feedback & Suggestions:
😼 Mergecat review of cmd/locations.go@@ -18,7 +18,7 @@ import (
func processLocations(ctx context.Context, ctr container.Container, locations []string) error {
for index, location := range locations {
- if newLocation, err := maybeCloneGitUrl(ctx, ctr.RepoManager, ctr.Config.RepoRefreshInterval, location, ctr.VcsClient.Username()); err != nil {
+ if newLocation, err := maybeCloneGitUrl(ctx, ctr.RepoManager, ctr.Config.RepoRefreshInterval, location, ctr.VcsClient.Username(), ctr.Config.RepoShallowClone); err != nil {
return errors.Wrapf(err, "failed to clone %q", location)
} else if newLocation != "" {
locations[index] = newLocation
@@ -31,12 +31,12 @@ func processLocations(ctx context.Context, ctr container.Container, locations []
}
type cloner interface {
- Clone(ctx context.Context, cloneUrl, branchName string) (*git.Repo, error)
+ Clone(ctx context.Context, cloneUrl, branchName string, shallow bool) (*git.Repo, error)
}
var ErrCannotUseQueryWithFilePath = errors.New("relative and absolute file paths cannot have query parameters")
-func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDuration time.Duration, location, vcsUsername string) (string, error) {
+func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDuration time.Duration, location, vcsUsername string, shallow bool) (string, error) {
result := strings.SplitN(location, "?", 2)
if !isGitURL(result[0]) {
if len(result) > 1 {
@@ -51,7 +51,7 @@ func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDurati
}
cloneUrl := repoUrl.CloneURL(vcsUsername)
- repo, err := repoManager.Clone(ctx, cloneUrl, query.Get("branch"))
+ repo, err := repoManager.Clone(ctx, cloneUrl, query.Get("branch"), shallow)
if err != nil {
return "", errors.Wrap(err, "failed to clone")
} Feedback & Suggestions:
😼 Mergecat review of pkg/git/repo.go@@ -28,6 +28,7 @@ type Repo struct {
BranchName string
Config config.ServerConfig
CloneURL string
+ Shallow bool
// exposed state
Directory string
@@ -46,11 +47,17 @@ func New(cfg config.ServerConfig, cloneUrl, branchName string) *Repo {
}
func (r *Repo) Clone(ctx context.Context) error {
+ if r.Shallow {
+ return r.shallowClone(ctx)
+ }
+
var err error
- r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
- if err != nil {
- return errors.Wrap(err, "failed to make temp dir")
+ if r.Directory == "" {
+ r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
+ if err != nil {
+ return errors.Wrap(err, "failed to make temp dir")
+ }
}
log.Info().
@@ -85,6 +92,63 @@ func (r *Repo) Clone(ctx context.Context) error {
return nil
}
+func (r *Repo) shallowClone(ctx context.Context) error {
+ var err error
+
+ if r.Directory == "" {
+ r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
+ if err != nil {
+ return errors.Wrap(err, "failed to make temp dir")
+ }
+ }
+
+ log.Info().
+ Str("temp-dir", r.Directory).
+ Str("clone-url", r.CloneURL).
+ Str("branch", r.BranchName).
+ Msg("cloning git repo")
+
+ // Attempt to locally clone the repo based on the provided information stored within
+ _, span := tracer.Start(ctx, "ShallowCloneRepo")
+ defer span.End()
+
+ args := []string{"clone", r.CloneURL, r.Directory, "--depth", "1"}
+ cmd := r.execGitCommand(args...)
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ log.Error().Err(err).Msgf("unable to clone repository, %s", out)
+ return err
+ }
+
+ if r.BranchName != "HEAD" {
+ // Fetch SHA
+ args = []string{"fetch", "origin", r.BranchName, "--depth", "1"}
+ cmd = r.execGitCommand(args...)
+ out, err = cmd.CombinedOutput()
+ if err != nil {
+ log.Error().Err(err).Msgf("unable to fetch %s repository, %s", r.BranchName, out)
+ return err
+ }
+ // Checkout SHA
+ args = []string{"checkout", r.BranchName}
+ cmd = r.execGitCommand(args...)
+ out, err = cmd.CombinedOutput()
+ if err != nil {
+ log.Error().Err(err).Msgf("unable to checkout branch %s repository, %s", r.BranchName, out)
+ return err
+ }
+ }
+
+ if log.Trace().Enabled() {
+ if err = filepath.WalkDir(r.Directory, printFile); err != nil {
+ log.Warn().Err(err).Msg("failed to walk directory")
+ }
+ }
+
+ log.Info().Msg("repo has been cloned")
+ return nil
+}
+
func printFile(s string, d fs.DirEntry, err error) error {
if err != nil {
return err
@@ -118,8 +182,24 @@ func (r *Repo) MergeIntoTarget(ctx context.Context, ref string) error {
attribute.String("sha", ref),
))
defer span.End()
+ merge_command := []string{"merge", ref}
+ // For shallow clones, we need to pull the ref into the repo
+ if r.Shallow {
+ ref = strings.TrimPrefix(ref, "origin/")
+ cmd := r.execGitCommand("fetch", "origin", fmt.Sprintf("%s:%s", ref, ref), "--depth", "1")
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ telemetry.SetError(span, err, "fetch origin ref")
+ log.Error().Err(err).Msgf("unable to fetch ref %s, %s", ref, out)
+ return err
+ }
+ // When merging shallow clones, we need to allow unrelated histories
+ // and use the "theirs" strategy to avoid conflicts
+ // cons of this is that it may not be entirely accurate and may overwrite changes in the target branch
+ merge_command = []string{"merge", ref, "--allow-unrelated-histories", "-X", "theirs"}
+ }
- cmd := r.execGitCommand("merge", ref)
+ cmd := r.execGitCommand(merge_command...)
out, err := cmd.CombinedOutput()
if err != nil {
telemetry.SetError(span, err, "merge commit into branch")
@@ -131,6 +211,15 @@ func (r *Repo) MergeIntoTarget(ctx context.Context, ref string) error {
}
func (r *Repo) Update(ctx context.Context) error {
+ // Since we're shallow cloning, to update we need to wipe the directory and re-clone
+ if r.Shallow {
+ r.Wipe()
+ err := os.Mkdir(r.Directory, 0700)
+ if err != nil {
+ return errors.Wrap(err, "failed to create repo directory")
+ }
+ return r.Clone(ctx)
+ }
cmd := r.execGitCommand("pull")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stdout Feedback & Suggestions:
Dependency ReviewClick to read mergecats review!No suggestions found |
This adds support for shallow cloning repos.
With every hook/request, kubechecks clone the full repo. This can become very expensive for large repositories.
Shallow cloning allows us to limit the cloning to just the commits we need.