diff --git a/cmd/gerritbot/gerritbot.go b/cmd/gerritbot/gerritbot.go index 0953ee8f8d..f3b60f4679 100644 --- a/cmd/gerritbot/gerritbot.go +++ b/cmd/gerritbot/gerritbot.go @@ -28,6 +28,7 @@ import ( "cloud.google.com/go/compute/metadata" "github.com/google/go-github/v48/github" "github.com/gregjones/httpcache" + "golang.org/x/build/cmd/gerritbot/internal/rules" "golang.org/x/build/gerrit" "golang.org/x/build/internal/https" "golang.org/x/build/internal/secret" @@ -729,7 +730,8 @@ func (b *bot) importGerritChangeFromPR(ctx context.Context, pr *github.PullReque pushOpts = "%ready" } - if cl == nil { + newCL := cl == nil + if newCL { // Add this informational message only on CL creation. msg := fmt.Sprintf("This Gerrit CL corresponds to GitHub PR %s.\n\nAuthor: %s", prShortLink(pr), author) pushOpts += ",m=" + url.QueryEscape(msg) @@ -766,7 +768,57 @@ Please visit Gerrit at %s. * You should word wrap the PR description at ~76 characters unless you need longer lines (e.g., for tables or URLs). * See the [Sending a change via GitHub](https://go.dev/doc/contribute#sending_a_change_github) and [Reviews](https://go.dev/doc/contribute#reviews) sections of the Contribution Guide as well as the [FAQ](https://github.com/golang/go/wiki/GerritBot/#frequently-asked-questions) for details.`, pr.Head.GetSHA(), changeURL) - return b.postGitHubMessageNoDup(ctx, repo.GetOwner().GetLogin(), repo.GetName(), pr.GetNumber(), "", msg) + err = b.postGitHubMessageNoDup(ctx, repo.GetOwner().GetLogin(), repo.GetName(), pr.GetNumber(), "", msg) + if err != nil { + return err + } + + if newCL { + // Check if we spot any problems with the CL according to our internal + // set of rules, and if so, add an unresolved comment to Gerrit. + // If the author responds to this, it also helps a reviewer see the author has + // registered for a Gerrit account and knows how to reply in Gerrit. + // TODO: see CL 509135 for possible follow-ups, including possibly always + // asking explicitly if the CL is ready for review even if there are no problems, + // and possibly reminder comments followed by ultimately automatically + // abandoning the CL if the author never replies. + change, err := rules.ParseCommitMessage(repo.GetName(), cmsg) + if err != nil { + return fmt.Errorf("failed to parse commit message for %s: %v", prShortLink(pr), err) + } + problems := rules.Check(change) + if len(problems) > 0 { + summary := rules.FormatResults(problems) + // If needed, summary contains advice for how to edit the commit message. + msg := fmt.Sprintf("Hello %v, I've spotted some possible problems.\n\n"+ + "These findings are based on simple heuristics. If a finding appears wrong, briefly reply here saying so. "+ + "Otherwise, please address any problems and update the GitHub PR. "+ + "When complete, mark this comment as 'Done' and click the [blue 'Reply' button](https://github.com/golang/go/wiki/GerritBot#i-left-a-reply-to-a-comment-in-gerrit-but-no-one-but-me-can-see-it) above.\n\n"+ + "%s\n\n"+ + "(In general for Gerrit code reviews, the CL author is expected to close out each piece of feedback by "+ + "marking it as 'Done' if implemented as suggested or otherwise reply to each review comment. "+ + "See the [Review](https://go.dev/doc/contribute#review) section of the Contributing Guide for details.)", + author, summary) + + gcl, err := b.gerritChangeForPR(pr) + if err != nil { + return fmt.Errorf("could not look up CL after creation for %s: %v", prShortLink(pr), err) + } + unresolved := true + ri := gerrit.ReviewInput{ + Comments: map[string][]gerrit.CommentInput{ + "/PATCHSET_LEVEL": {{Message: msg, Unresolved: &unresolved}}, + }, + } + // TODO: instead of gcl.ID, we might need to do something similar to changeTriple in cmd/coordinator. + err = b.gerritClient.SetReview(ctx, gcl.ChangeID, gcl.ID, ri) + if err != nil { + return fmt.Errorf("could not add findings comment to CL for %s: %v", prShortLink(pr), err) + } + } + } + + return nil } var changeIdentRE = regexp.MustCompile(`(?m)^Change-Id: (I[0-9a-fA-F]{40})\n?`) diff --git a/cmd/gerritbot/internal/rules/rules.go b/cmd/gerritbot/internal/rules/rules.go new file mode 100644 index 0000000000..00a0d007ee --- /dev/null +++ b/cmd/gerritbot/internal/rules/rules.go @@ -0,0 +1,446 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package rules specifies a simple set of rules for checking GitHub PRs or Gerrit CLs +// for certain common mistakes, like no package in the first line of the commit message +// or having long lines in the commit message body. +// +// A rule is primarily defined via a function that takes a Change (CL or PR) as input +// and reports zero or 1 findings, which is just a string (usually 1-2 short sentences). +// A rule can also optionally return a note, which might be auxiliary advice such as +// how to edit the commit message. +// +// The FormatResults function renders the results into markdown. +// Each finding is shown to the user, currently in a numbered list of findings. +// The auxiliary notes are deduplicated, so that for example a set of rules +// looking for problems in a commit message can all return the same editing advice +// but that advice is then only shown to the user only once. +// +// For example, for a commit message with a first line of: +// +// fmt: Improve foo and bar. +// +// That would currently trigger two rules, with an example formatted result of +// the following (including the deduplicated advice at the end): +// +// Possible problems detected: +// 1. The first word in the commit title after the package should be a +// lowercase English word (usually a verb). +// 2. The commit title should not end with a period. +// +// To edit the commit message, see instructions [here](...). For guidance on commit +// messages for the Go project, see [here](...). +// +// Rules currently err on the side of simplicity and avoiding false positives. +// It is is intended to be straightforward to add a new rule. +// +// Rules can be arranged to trigger completely independently of one another, +// or alternatively a set of rules can optionally be arranged in groups that +// form a precedence based on order within the group, where at most one rule +// from that group will trigger. This can be helpful in cases like: +// - You'd like to have a "catch all" rule that should only trigger if another rule +// in the same group hasn't triggered. +// - You have a rule that spots a more general problem and rules that spot +// more specific or pickier variant of the same problem and want to have +// the general problem reported if applicable without also reporting +// pickier findings that would seem redundant. +// - A subset of rules that are otherwise intended to be mutually exclusive. +package rules + +import ( + "fmt" + "strings" +) + +// rule defines a single rule that can report a finding. +// See the package comment for an overview. +type rule struct { + // name is a short internal rule name that don't expect to tweak frequently. + // We don't show it to users, but do use in tests. + name string + + // f is the rule function that reports a single finding and optionally + // an auxiliary note, which for example could be advice on how to edit the commit message. + // Notes are deduplicated across different rules, and ignored for a given rule if there is no finding. + f func(change Change) (finding string, note string) + + // skip lists repos to skip for this rule. + skip []string + + // only lists the only repos that this rule will run against. + only []string +} + +// ruleGroups defines our live set of rules. It is a [][]rule +// because the individual rules are arranged into groups of rules +// that are mutually exclusive, where the first triggering rule wins +// within a []rule in ruleGroups. See the package comment for details. +var ruleGroups = [][]rule{ + { + // We have two rules in this rule group, and hence we report at most one of them. + // The second (pickier) rule is checked only if the first doesn't trigger. + { + name: "title: no package found", + skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. + f: func(change Change) (finding string, note string) { + component, example := packageExample(change.Repo) + finding = fmt.Sprintf("The commit title should start with the primary affected %s name followed by a colon, like \"%s\".", component, example) + start, _, ok := strings.Cut(change.Title, ":") + if !ok { + // No colon. + return finding, commitMessageAdvice + } + var pattern string + switch change.Repo { + default: + // A single package starts with a lowercase ASCII and has no spaces prior to the colon. + pattern = `^[a-z][^ ]+$` + case "website": + // Allow leading underscore (e.g., "_content"). + pattern = `^[a-z_][^ ]+$` + case "vscode-go": + // Allow leading uppercase and period (e.g., ".github/workflow"). + pattern = `^[a-zA-Z.][^ ]+$` + } + if match(pattern, start) { + return "", "" + } + if match(`^(README|DEBUG)`, start) { + // Rare, but also allow a root README or DEBUG file as a component. + return "", "" + } + pkgs := strings.Split(start, ", ") + if match(`^[a-z]`, start) && len(pkgs) > 1 && !matchAny(` `, pkgs) { + // We also allow things like "go/types, types2: ..." (but not "hello, fun world: ..."). + return "", "" + } + return finding, commitMessageAdvice + }, + }, + { + name: "title: no colon then single space after package", + skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. + f: func(change Change) (finding string, note string) { + component, example := packageExample(change.Repo) + finding = fmt.Sprintf("The %s in the commit title should be followed by a colon and single space, like \"%s\".", component, example) + if !match(`[^ ]: [^ ]`, change.Title) { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "title: no lowercase word after a first colon", + skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. + f: func(change Change) (finding string, note string) { + component, _ := packageExample(change.Repo) + finding = fmt.Sprintf("The first word in the commit title after the %s should be a lowercase English word (usually a verb).", component) + _, after, ok := strings.Cut(change.Title, ":") + if !ok { + // No colon. Someone who doesn't have any colon probably can use a reminder about what comes next. + return finding, commitMessageAdvice + } + if !match(`^[ ]*[a-z]+\b`, after) { + // Probably not a lowercase English verb. + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "title: ends with period", + f: func(change Change) (finding string, note string) { + finding = "The commit title should not end with a period." + if len(change.Title) > 0 && change.Title[len(change.Title)-1] == '.' { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + // We have two rules in this group, and hence we report at most one of them. + // The second rule is pickier. + { + name: "body: short", + f: func(change Change) (finding string, note string) { + finding = "The commit message body is %s. " + + "That can be OK if the change is trivial like correcting spelling or fixing a broken link, " + + "but usually the description should provide context for the change and explain what it does in complete sentences." + if !mightBeTrivial(change) { + size := len([]rune(change.Body)) + switch { + case size == 0: + return fmt.Sprintf(finding, "missing"), commitMessageAdvice + case size < 24: // len("Updates golang/go#12345") == 23 + return fmt.Sprintf(finding, "very brief"), commitMessageAdvice + } + } + return "", "" + }, + }, + { + name: "body: no sentence candidates found", + f: func(change Change) (finding string, note string) { + finding = "Are you describing the change in complete sentences with correct punctuation in the commit message body, including ending sentences with periods?" + if !mightBeTrivial(change) && !match("[a-zA-Z0-9\"'`)][.:?!)]( |\\n|$)", change.Body) { + // A complete English sentence usually ends with an alphanumeric immediately followed by a terminating punctuation. + // (This is an approximate test, but currently has a very low false positive rate. Brief manual checking suggests + // ~zero false positives in a corpus of 1000 CLs). + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: long lines", + f: func(change Change) (finding string, note string) { + finding = "Lines in the commit message should be wrapped at ~76 characters unless needed for things like URLs or tables. You have a %d character line." + // Check if something smells like a table, ASCII art, or benchstat output. + if match("----|____|[\u2500-\u257F]", change.Body) || match(` ± [0-9.]+%`, change.Body) { + // This might be something that is allowed to be wide. + // (The Unicode character range above covers box drawing characters, which is probably overkill). + // We are lenient here and don't check the rest of the body, + // mainly to be friendly and also we don't want to guess + // line by line whether something looks like a table. + return "", "" + } + longest := 0 + for _, line := range splitLines(change.Body) { + if match(`https?://|www\.|\.(com|org|dev)`, line) || matchCount(`[/\\]`, line) > 4 { + // Might be a long url or other path. + continue + } + l := len([]rune(line)) + if longest < l { + longest = l + } + } + if longest > 78 { // Official guidance on wiki is "~76". + return fmt.Sprintf(finding, longest), commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: might use markdown", + f: func(change Change) (finding string, note string) { + finding = "Are you using markdown? Markdown should not be used to augment text in the commit message." + if match(`(?i)markdown`, change.Title) || match(`(?i)markdown`, change.Body) { + // Could be a markdown-related fix. + return "", "" + } + if matchCount("(?m)^```", change.Body) >= 2 { + // Looks like a markdown code block. + return finding, commitMessageAdvice + } + // Now look for markdown backticks (which might be most common offender), + // but with a mild attempt to avoid flagging a Go regexp or other Go raw string. + if !match(`regex|string`, change.Title) { + for _, line := range splitLines(change.Body) { + if strings.ContainsAny(line, "={}()[]") || match(`regex|string`, line) { + // This line might contain Go code. This is fairly lenient, but + // in practice, if someone uses markdown once in a CL, they tend to use + // it multiple times, so we often have multiple chances to find a hit. + continue + } + if match("`.+`", line) { + // Could be a markdown backtick. + return finding, commitMessageAdvice + } + } + } + if match(`\[.+]\(https?://.+\)`, change.Body) { + // Looks like a markdown link. (Sorry, currently our ugliest regexp). + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: still contains PR instructions", + f: func(change Change) (finding string, note string) { + finding = "Do you still have the GitHub PR instructions in your commit message text? The PR instructions should be deleted once you have applied them." + if strings.Contains(change.Body, "Delete these instructions once you have read and applied them") { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: contains Signed-off-by", + f: func(change Change) (finding string, note string) { + finding = "Please do not use 'Signed-off-by'. We instead rely on contributors signing CLAs." + if match(`(?mi)^Signed-off-by: `, change.Body) { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + // We have three rules in this group, and hence we report at most one of them. + // The three rules get progressively pickier. + // We exempt the proposal repo from these rules because almost all GitHub PRs + // for the proposal repo are for typos or similar. + { + name: "body: no bug reference candidate found", + skip: []string{"proposal"}, + f: func(change Change) (finding string, note string) { + finding = "You usually need to reference a bug number for all but trivial or cosmetic fixes. " + + "%s at the end of the commit message. Should you have a bug reference?" + if mightBeTrivial(change) { + return "", "" + } + var bugPattern string + switch usesTracker(change.Repo) { + case mainTracker: + bugPattern = `#\d{4}` + default: + bugPattern = `#\d{2}` + } + if !match(bugPattern, change.Body) { + return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice + } + return "", "" + }, + }, + { + name: "body: bug format looks incorrect", + skip: []string{"proposal"}, + f: func(change Change) (finding string, note string) { + finding = "Do you have the right bug reference format? %s (without a period) at the end of the commit message." + if mightBeTrivial(change) { + return "", "" + } + var bugPattern string + switch { + case change.Repo == "go": + bugPattern = `#\d{4,}$` + case usesTracker(change.Repo) == mainTracker: + bugPattern = `golang/go#\d{4,}$` + case usesTracker(change.Repo) == ownTracker: + bugPattern = fmt.Sprintf(`golang/%s#\d{2,}$`, change.Repo) + default: + bugPattern = `golang/go#\d{4,}$` + } + if !match(`(?m)^(Fixes|Updates|For|Closes|Resolves) `+bugPattern, change.Body) { + return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice + } + return "", "" + }, + }, + { + name: "body: no bug reference candidate at end", + skip: []string{"proposal"}, + f: func(change Change) (finding string, note string) { + // If this rule is running, it means it passed the earlier bug-related rules + // in this group, so we know there is what looks like a well-formed bug. + finding = "It looks like you have a properly formated bug reference, but the convention is to " + + "put bug references at the bottom of the commit message, even if a bug is also mentioned " + + "in the body of the message." + if mightBeTrivial(change) { + return "", "" + } + var bugPattern string + switch { + case usesTracker(change.Repo) == mainTracker: + bugPattern = `#\d{4}` + default: + bugPattern = `#\d{2}` + } + lines := splitLines(change.Body) + if len(lines) > 0 && !match(bugPattern, lines[len(lines)-1]) { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, +} + +// Auxiliary advice. +var commitMessageAdvice = "To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). " + + "For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages)." + +// mightBeTrivial reports if a change looks like something +// where a commit body is often allowed to be skipped, such +// as correcting spelling, fixing a broken link, or +// adding or correcting an example. +func mightBeTrivial(change Change) bool { + return match(`(?i)typo|spell|grammar|grammatical|comment|readme|document|\bdocs?\b|example|(fix|correct|broken|wrong).*(link|url)`, change.Title) +} + +// bugExamples returns a snippet of text that includes example bug references +// formatted as expected for a repo. +func bugExamples(repo string) string { + switch { + case repo == "go": + return "For this repo, the format is usually 'Fixes #12345' or 'Updates #12345'" + case usesTracker(repo) == mainTracker: + return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/go#12345' or 'Updates golang/go#12345'", repo) + case usesTracker(repo) == ownTracker: + return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/%s#1234' or 'Updates golang/%s#1234'", repo, repo, repo) + default: + // We don't know how issues should be referenced for this repo, including this might be + // some future created after these rules were last updated, so be a bit vaguer. + return "For most repos outside the main go repo, the format is usually 'Fixes #12345' or 'Updates #12345'" + } +} + +// packageExample returns an example usage of a package/component in a commit title +// for a given repo, along with what to call the leading words before the first +// colon ("package" vs. "component"). +func packageExample(repo string) (component string, example string) { + switch repo { + default: + return "package", "net/http: improve [...]" + case "website": + return "component", "_content/doc/go1.21: fix [...]" + case "vscode-go": + return "component", "src/goInstallTools: improve [...]" + } +} + +// tracker is the issue tracker used by a repo. +type tracker int + +const ( + mainTracker tracker = iota // uses the main golang/go issue tracker. + ownTracker // uses a repo-specific tracker, like golang/vscode-go. + unknownTracker // we don't know what tracker is used, which usually means the main tracker is used. +) + +// usesTracker reports if a repo uses the main golang/go tracker or a repo-specific tracker. +// Callers should deal gracefully with unknownTracker being reported (such as by giving less precise +// advice that is still generally useful), which might happen with a less commonly used repo or some future repo. +func usesTracker(repo string) tracker { + // It's OK for a repo to be missing from our list. If something is missing, either it is not frequently used + // and hence hopefully not a major problem, or in the rare case we are missing something frequently used, + // this can be updated if someone complains. + // TODO: not immediately clear where vulndb should go. In practice, it seems to use the main golang/go tracker, + // but it also has its own tracker (which might just be for security reports?). Leaving unspecified for now, + // which results in vaguer advice. + switch repo { + case "go", "arch", "build", "crypto", "debug", "exp", "image", "mobile", "mod", "net", "perf", "pkgsite", "playground", + "proposal", "review", "sync", "sys", "telemetry", "term", "text", "time", "tools", "tour", "vuln", "website", "xerrors": + return mainTracker + case "vscode-go": + return ownTracker + default: + return unknownTracker + } +} diff --git a/cmd/gerritbot/internal/rules/rules_test.go b/cmd/gerritbot/internal/rules/rules_test.go new file mode 100644 index 0000000000..1266acc8bd --- /dev/null +++ b/cmd/gerritbot/internal/rules/rules_test.go @@ -0,0 +1,589 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rules + +import ( + "strings" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestFormatRuleResults(t *testing.T) { + tests := []struct { + title string + repo string // if empty string, treated as "go" repo + body string + want string + }{ + { + title: `fmt: improve some things`, // We consider this a good commit message title. + body: goodCommitBody, + want: ``, + }, + { + title: `a bad commit message title.`, + body: goodCommitBody, + want: `Possible problems detected: + 1. The commit title should start with the primary affected package name followed by a colon, like "net/http: improve [...]". + 2. The first word in the commit title after the package should be a lowercase English word (usually a verb). + 3. The commit title should not end with a period. + +To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages). +`, + }, + { + title: `A bad vscode-go commit title`, // This verifies we complain about a "component" rather than "package". + repo: "vscode-go", + body: "This includes a bad bug format for vscode-go repo.\nFixes #1234", + want: `Possible problems detected: + 1. The commit title should start with the primary affected component name followed by a colon, like "src/goInstallTools: improve [...]". + 2. The first word in the commit title after the component should be a lowercase English word (usually a verb). + 3. Do you have the right bug reference format? For the vscode-go repo, the format is usually 'Fixes golang/vscode-go#1234' or 'Updates golang/vscode-go#1234' (without a period) at the end of the commit message. + +To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages). +`, + }, + { + title: goodCommitTitle, + body: "This commit body is missing a bug reference.", + want: `Possible problems detected: + 1. You usually need to reference a bug number for all but trivial or cosmetic fixes. For this repo, the format is usually 'Fixes #12345' or 'Updates #12345' at the end of the commit message. Should you have a bug reference? + +To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages). +`, + }, + } + for _, tt := range tests { + t.Run("title "+tt.title, func(t *testing.T) { + commit := commitMessage(tt.title, tt.body, goodCommitFooters) + repo := "go" + if tt.repo != "" { + repo = tt.repo + } + change, err := ParseCommitMessage(repo, commit) + if err != nil { + t.Fatalf("ParseCommitMessage failed: %v", err) + } + results := Check(change) + got := FormatResults(results) + t.Log("FormatResults:\n" + got) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestTitleRules(t *testing.T) { + tests := []struct { + title string + repo string // if empty string, treated as "go" repo + body string + want []string + }{ + // We consider these good titles. + { + title: "fmt: good", + want: nil, + }, + { + title: "go/types, types2: good", + want: nil, + }, + { + title: "go/types, types2, types3: good", + want: nil, + }, + { + title: "fmt: improve & make: Good", + want: nil, + }, + { + title: "README: fix something", + want: nil, + }, + { + title: "_content/doc/go1.21: fix something", + repo: "website", + want: nil, + }, + { + title: "Fix a proposal", // We are lenient with proposal repo titles. + repo: "proposal", + want: nil, + }, + + // We consider these bad titles. + { + title: "bad.", + want: []string{ + "title: no package found", + "title: no lowercase word after a first colon", + "title: ends with period", + }, + }, + { + title: "bad", + want: []string{ + "title: no package found", + "title: no lowercase word after a first colon", + }, + }, + { + title: "fmt: bad.", + want: []string{ + "title: ends with period", + }, + }, + { + title: "Fmt: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "fmt: Bad", + want: []string{ + "title: no lowercase word after a first colon", + }, + }, + { + title: "fmt: bad", + want: []string{ + "title: no colon then single space after package", + }, + }, + { + title: "fmt: Bad", + want: []string{ + "title: no colon then single space after package", + "title: no lowercase word after a first colon", + }, + }, + { + title: "fmt:bad", + want: []string{ + "title: no colon then single space after package", + }, + }, + { + title: "fmt : bad", + want: []string{ + "title: no package found", + }, + }, + { + title: ": bad", + want: []string{ + "title: no package found", + }, + }, + { + title: " : bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "go/types types2: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "a sentence, with a comma and colon: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "a sentence with a colon: and a wrongly placed package fmt: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "", + want: []string{ + "title: no package found", + "title: no lowercase word after a first colon", + }, + }, + + // We allow these titles (in interests of simplicity or leniency). + // TODO: are some of these considered an alternative good style? + { + title: "go/types,types2: we allow", + want: nil, + }, + { + title: "cmd/{compile,link}: we allow", + want: nil, + }, + { + title: "cmd/{compile, link}: we allow", + want: nil, + }, + } + for _, tt := range tests { + t.Run("title "+tt.title, func(t *testing.T) { + commit := commitMessage(tt.title, goodCommitBody, goodCommitFooters) + repo := "go" + if tt.repo != "" { + repo = tt.repo + } + change, err := ParseCommitMessage(repo, commit) + if err != nil { + t.Fatalf("ParseCommitMessage failed: %v", err) + } + results := Check(change) + + var got []string + for _, r := range results { + got = append(got, r.Name) + } + + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestBodyRules(t *testing.T) { + tests := []struct { + name string + title string // if empty string, we use goodCommitTitle + repo string // if empty string, treated as "go" repo + body string + want []string + }{ + // We consider these good bodies. + { + name: "good", + body: goodCommitBody, + want: nil, + }, + { + name: "good bug format for go repo", + repo: "go", + body: "This is This is body text.\n\nFixes #1234", + want: nil, + }, + { + name: "good bug format for tools repo", + repo: "tools", + body: "This is This is body text.\n\nFixes golang/go#1234", + want: nil, + }, + { + name: "good bug format for vscode-go", + repo: "vscode-go", + body: "This is This is body text.\n\nFixes golang/vscode-go#1234", + want: nil, + }, + { + name: "good bug format for unknown repo", + repo: "some-future-repo", + body: "This is This is body text.\n\nFixes golang/go#1234", + want: nil, + }, + { + name: "allowed long lines for benchstat output", + body: "Encode/format=json-48 1.718µ ± 1% 1.423µ ± 1% -17.20% (p=0.000 n=10)" + + strings.Repeat("Hello. ", 100) + "\n" + goodCommitBody, + want: nil, + }, + { + name: "a trival fix", + title: "fmt: fix spelling mistakes", + body: "", + want: nil, // we don't flag short body or missing bug reference because "spelling" is in title + }, + + // Now we consider some bad bodies. + // First, some basic mistakes. + { + name: "too short", + body: "Short body", + want: []string{ + "body: short", + "body: no bug reference candidate found", + }, + }, + { + name: "missing body", + body: "", + want: []string{ + "body: short", + "body: no bug reference candidate found", + }, + }, + { + name: "not word wrapped", + body: strings.Repeat("Hello. ", 100) + "\n" + goodCommitBody, + want: []string{ + "body: long lines", + }, + }, + { + name: "not a sentence", + body: "This is missing a period", + want: []string{ + "body: no sentence candidates found", + "body: no bug reference candidate found", + }, + }, + { + name: "Signed-off-by", + body: "Signed-off-by: bad\n\n" + goodCommitBody, + want: []string{ + "body: contains Signed-off-by", + }, + }, + { + name: "PR instructions", + body: "Delete these instructions once you have read and applied them.\n", + want: []string{ + "body: still contains PR instructions", + "body: no bug reference candidate found", + }, + }, + + // Next, mistakes in the repo-specific format or location of bug references. + { + name: "bad bug format for go repo", + repo: "go", + body: "This is body text.\n\nFixes golang/go#1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug format for tools repo", + repo: "tools", + body: "This is body text.\n\nFixes #1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug format for vscode-go", + repo: "vscode-go", + body: "This is body text.\n\nFixes #1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug format for unknown repo", + repo: "some-future-repo", + body: "This is body text.\n\nFixes #1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug location", + body: "This is body text.\nFixes #1234\nAnd a final line we should not have.", + want: []string{ + "body: no bug reference candidate at end", + }, + }, + + // We next have some good bodies that are markdown-ish, + // but we allow them. + { + name: "allowed markdown: mention regex in title", + title: "regexp: fix something", + body: "Example `.*`.\n" + goodCommitBody, + want: nil, + }, + { + name: "allowed markdown: mention regex in body", + body: "A regex `.*`.\n" + goodCommitBody, + want: nil, + }, + { + name: "allowed markdown: mention markdown", + body: "A markdown bug `and this is ok`.\n" + goodCommitBody, + want: nil, + }, + { + name: "allowed markdown: might be go code", + body: " s := `raw`\n" + goodCommitBody, + want: nil, + }, + + // Examples of using markdown that we flag. + { + name: "markdown backticks", + body: "A variable `foo`.\n" + goodCommitBody, + want: []string{ + "body: might use markdown", + }, + }, + { + name: "markdown block quote", + body: "Some code:\n```\nx := y\n```\n" + goodCommitBody, + want: []string{ + "body: might use markdown", + }, + }, + { + name: "markdown link", + body: "[click here](https://example.com)\n" + goodCommitBody, + want: []string{ + "body: might use markdown", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + title := goodCommitTitle + if tt.title != "" { + title = tt.title + } + commit := commitMessage(title, tt.body, goodCommitFooters) + repo := "go" + if tt.repo != "" { + repo = tt.repo + } + change, err := ParseCommitMessage(repo, commit) + if err != nil { + t.Fatalf("ParseCommitMessage failed: %v", err) + } + results := Check(change) + + var got []string + for _, r := range results { + got = append(got, r.Name) + } + + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestParseCommitMessage(t *testing.T) { + tests := []struct { + name string + repo string + text string + want Change + wantErr bool + }{ + // Bad examples. + { + name: "not enough lines", + text: "title", + want: Change{}, + wantErr: true, + }, + { + name: "second line not blank", + text: "title\nbad line\nBody 1", + want: Change{}, + wantErr: true, + }, + { + name: "no footer", + text: "title\n\nBody 1\n", + want: Change{}, + wantErr: true, + }, + { + name: "no footer and no body", + text: "title\n\n\n", + want: Change{}, + wantErr: true, + }, + + // Good examples. + { + name: "good", + text: "title\n\nBody 1\n\nFooter: 1\n", + want: Change{ + Title: "title", + Body: "Body 1", + }, + wantErr: false, + }, + { + name: "good with two body lines", + text: "title\n\nBody 1\nBody 2\n\nFooter: 1\n", + want: Change{ + Title: "title", + Body: "Body 1\nBody 2", + }, + wantErr: false, + }, + { + name: "good with empty body", + text: "title\n\nFooter: 1\n", + want: Change{ + Title: "title", + Body: "", + }, + wantErr: false, + }, + { + name: "good with extra blank lines after footer", + text: "title\n\nBody 1\n\nFooter: 1\n\n\n", + want: Change{ + Title: "title", + Body: "Body 1", + }, + wantErr: false, + }, + { + name: "good with body line that looks like footer", + text: "title\n\nBody 1\nLink: example.com\n\nFooter: 1\n\n\n", + want: Change{ + Title: "title", + Body: "Body 1\nLink: example.com", + }, + wantErr: false, + }, + { + name: "allowed cherry pick in footer", // Example from CL 346093. + text: "title\n\nBody 1\n\nFooter: 1\n(cherry picked from commit ebd07b13caf35114b32e7d6783b27902af4829ce)\n", + want: Change{ + Title: "title", + Body: "Body 1", + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseCommitMessage(tt.repo, tt.text) + if (err != nil) != tt.wantErr { + t.Errorf("ParseCommitMessage() error = %v, wantErr %v", err, tt.wantErr) + return + } + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +// commitMessage helps us create valid commit messages while testing. +func commitMessage(title, body, footers string) string { + return title + "\n\n" + body + "\n\n" + footers +} + +// Some auxiliary testing variables available for use when creating commit messages. +var ( + goodCommitTitle = "pkg: a title that does not trigger any rules" + goodCommitBody = "A commit message body that does not trigger any rules.\n\nFixes #1234" + goodCommitFooters = `Change-Id: I1d8d10b142358983194ef2c389de4d9862d4ce97 +GitHub-Last-Rev: 6d27e1471ee5dac0323a10b46e6e64e647068ecf +GitHub-Pull-Request: golang/build#69` +) diff --git a/cmd/gerritbot/internal/rules/run.go b/cmd/gerritbot/internal/rules/run.go new file mode 100644 index 0000000000..7b759eec90 --- /dev/null +++ b/cmd/gerritbot/internal/rules/run.go @@ -0,0 +1,170 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rules + +import ( + "bufio" + "fmt" + "regexp" + "strings" + + "golang.org/x/exp/slices" +) + +// Change represents a Gerrit CL and/or GitHub PR that we want to check rules against. +type Change struct { + // Repo is the repository as reported by Gerrit (e.g., "go", "tools", "vscode-go", "website"). + Repo string + // Title is the commit message first line. + Title string + // Body is the commit message body (skipping the title on the first line and the blank second line, + // and without the footers). + Body string + + // TODO: could consider a Footer field, if useful, though I think GerritBot & Gerrit manage those. + // TODO: could be useful in future to have CL and PR fields as well, e.g., if we wanted to + // spot check for something that looks like test files in the changed file list. +} + +func ParseCommitMessage(repo string, text string) (Change, error) { + change := Change{Repo: repo} + lines := splitLines(text) + if len(lines) < 3 { + return Change{}, fmt.Errorf("rules: ParseCommitMessage: short commit message: %q", text) + } + change.Title = lines[0] + if lines[1] != "" { + return Change{}, fmt.Errorf("rules: ParseCommitMessage: second line is not blank in commit message: %q", text) + } + + // Find the body. + // Trim the footers, starting at bottom and stopping at first blank line seen after any footers. + // It is an error to not have any footers (which likely means we are seeing something not managed + // by GerritBot and/or Gerrit). + body := lines[2:] + sawFooter := false + for i := len(body) - 1; i >= 0; i-- { + if match(`^[a-zA-Z][^ ]*: `, body[i]) { + body = body[:i] + sawFooter = true + continue + } + if match(`^\(cherry picked from commit [a-f0-9]+\)$`, body[i]) { + // One CL in our corpus (CL 346093) has this intermixed with the footers. + continue + } + if body[i] == "" { + if !sawFooter { + continue // We leniently skip any blank lines at bottom of commit message. + } + body = body[:i] + break + } + return Change{}, fmt.Errorf("rules: ParseCommitMessage: found non-footer line at end of commit message. line: %q, commit message: %q", body[i], text) + } + if !sawFooter { + return Change{}, fmt.Errorf("rules: ParseCommitMessage: did not find any footers preceded by blank line for commit message: %q", text) + } + change.Body = strings.Join(body, "\n") + + return change, nil +} + +// Result contains the result of a single rule check against a Change. +type Result struct { + Name string + Finding string + Note string +} + +// Check runs the defined rules against one Change. +func Check(change Change) (results []Result) { + for _, group := range ruleGroups { + for _, rule := range group { + if slices.Contains(rule.skip, change.Repo) || len(rule.only) > 0 && !slices.Contains(rule.only, change.Repo) { + continue + } + finding, advice := rule.f(change) + if finding != "" { + results = append(results, Result{ + Name: rule.name, + Finding: finding, + Note: advice, + }) + break // Only report the first finding per rule group. + } + } + } + return results +} + +// FormatResults returns a string ready to be placed in a CL comment, +// formatted as simple markdown. +func FormatResults(results []Result) string { + if len(results) == 0 { + return "" + } + var b strings.Builder + b.WriteString("Possible problems detected:\n") + cnt := 1 + for _, r := range results { + fmt.Fprintf(&b, " %d. %s\n", cnt, r.Finding) + cnt++ + } + advice := formatAdvice(results) + if advice != "" { + b.WriteString("\n" + advice + "\n") + } + return b.String() +} + +// formatAdvice returns a deduplicated string containing all the advice in results. +func formatAdvice(results []Result) string { + var s []string + seen := make(map[string]bool) + for _, r := range results { + if !seen[r.Note] { + s = append(s, r.Note) + } + seen[r.Note] = true + } + return strings.Join(s, " ") +} + +// match reports whether the regexp pattern matches s, +// returning false for a bad regexp after logging the bad regexp. +func match(pattern string, s string) bool { + re := regexp.MustCompile(pattern) + return re.MatchString(s) +} + +// matchAny reports whether the regexp pattern matches any string in list, +// returning false for a bad regexp after logging the bad regexp. +func matchAny(pattern string, list []string) bool { + re := regexp.MustCompile(pattern) + for _, s := range list { + if re.MatchString(s) { + return true + } + } + return false +} + +// matchAny reports the count of matches for the regexp in s, +// returning 0 for a bad regexp after logging the bad regexp. +func matchCount(pattern string, s string) int { + re := regexp.MustCompile(pattern) + return len(re.FindAllString(s, -1)) +} + +// splitLines returns s split into lines, without trailing \n. +func splitLines(s string) []string { + var lines []string + scanner := bufio.NewScanner(strings.NewReader(s)) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + return lines +}