From fdf5df99021c2e5844fcab5b78c1423d8c47f1e9 Mon Sep 17 00:00:00 2001 From: thepudds Date: Wed, 26 Jul 2023 12:49:36 -0400 Subject: [PATCH] cmd/gerritbot: automatically flag common issues in PRs In this CL, we add a set of checks that automatically run against a new GitHub PR after it is imported into Gerrit in order to flag common issues, such as missing a bug reference or ending the first line of the commit with a period. See below for the current set of defined rules. The intent is to save maintainer time, as well as perhaps make a better initial experience for a new contributor via faster feedback with potentially helpful linked resources. By attempting to engage a new contributor as soon as possible along with some pointers, we also hope to get them to reply (successfully) in Gerrit, which in turn helps a potential reviewer see that the contributor knows how to use Gerrit. The package comment in rules.go provides an overview of the implementation approach. The rules themselves are defined as short functions in rules.go. We currently have 100% test coverage within the rules package. Sample results from running on a corpus of 1000 GitHub PRs that were imported into Gerrit: Avg. findings per CL: 1.42 CLs with findings: 74.5% CL hit % Finding -------- ------------------------------------------------ 9.9% title: no package found 0.1% title: no colon then single space after package 13.0% title: no lowercase word after a first colon 2.0% title: ends with period 20.8% body: short 6.1% body: no sentence candidates found 21.7% body: long lines 10.5% body: might use markdown 2.2% body: still contains PR instructions 0.9% body: contains Signed-off-by 43.9% body: no bug reference candidate found 9.3% body: bug format looks incorrect 1.5% body: no bug reference candidate at end See golang/go#61573 for additional background. Updates golang/go#61316 Fixes golang/go#61573 Change-Id: I866cf650608d6ce9c6783aabc17a219f1815908c Reviewed-on: https://go-review.googlesource.com/c/build/+/513397 Auto-Submit: Dmitri Shuralyov TryBot-Result: Gopher Robot Reviewed-by: Damien Neil Run-TryBot: t hepudds Reviewed-by: Heschi Kreinick --- cmd/gerritbot/gerritbot.go | 56 +- cmd/gerritbot/internal/rules/rules.go | 446 ++++++++++++++++ cmd/gerritbot/internal/rules/rules_test.go | 589 +++++++++++++++++++++ cmd/gerritbot/internal/rules/run.go | 170 ++++++ 4 files changed, 1259 insertions(+), 2 deletions(-) create mode 100644 cmd/gerritbot/internal/rules/rules.go create mode 100644 cmd/gerritbot/internal/rules/rules_test.go create mode 100644 cmd/gerritbot/internal/rules/run.go diff --git a/cmd/gerritbot/gerritbot.go b/cmd/gerritbot/gerritbot.go index 0953ee8f8d..f3b60f4679 100644 --- a/cmd/gerritbot/gerritbot.go +++ b/cmd/gerritbot/gerritbot.go @@ -28,6 +28,7 @@ import ( "cloud.google.com/go/compute/metadata" "github.com/google/go-github/v48/github" "github.com/gregjones/httpcache" + "golang.org/x/build/cmd/gerritbot/internal/rules" "golang.org/x/build/gerrit" "golang.org/x/build/internal/https" "golang.org/x/build/internal/secret" @@ -729,7 +730,8 @@ func (b *bot) importGerritChangeFromPR(ctx context.Context, pr *github.PullReque pushOpts = "%ready" } - if cl == nil { + newCL := cl == nil + if newCL { // Add this informational message only on CL creation. msg := fmt.Sprintf("This Gerrit CL corresponds to GitHub PR %s.\n\nAuthor: %s", prShortLink(pr), author) pushOpts += ",m=" + url.QueryEscape(msg) @@ -766,7 +768,57 @@ Please visit Gerrit at %s. * You should word wrap the PR description at ~76 characters unless you need longer lines (e.g., for tables or URLs). * See the [Sending a change via GitHub](https://go.dev/doc/contribute#sending_a_change_github) and [Reviews](https://go.dev/doc/contribute#reviews) sections of the Contribution Guide as well as the [FAQ](https://github.com/golang/go/wiki/GerritBot/#frequently-asked-questions) for details.`, pr.Head.GetSHA(), changeURL) - return b.postGitHubMessageNoDup(ctx, repo.GetOwner().GetLogin(), repo.GetName(), pr.GetNumber(), "", msg) + err = b.postGitHubMessageNoDup(ctx, repo.GetOwner().GetLogin(), repo.GetName(), pr.GetNumber(), "", msg) + if err != nil { + return err + } + + if newCL { + // Check if we spot any problems with the CL according to our internal + // set of rules, and if so, add an unresolved comment to Gerrit. + // If the author responds to this, it also helps a reviewer see the author has + // registered for a Gerrit account and knows how to reply in Gerrit. + // TODO: see CL 509135 for possible follow-ups, including possibly always + // asking explicitly if the CL is ready for review even if there are no problems, + // and possibly reminder comments followed by ultimately automatically + // abandoning the CL if the author never replies. + change, err := rules.ParseCommitMessage(repo.GetName(), cmsg) + if err != nil { + return fmt.Errorf("failed to parse commit message for %s: %v", prShortLink(pr), err) + } + problems := rules.Check(change) + if len(problems) > 0 { + summary := rules.FormatResults(problems) + // If needed, summary contains advice for how to edit the commit message. + msg := fmt.Sprintf("Hello %v, I've spotted some possible problems.\n\n"+ + "These findings are based on simple heuristics. If a finding appears wrong, briefly reply here saying so. "+ + "Otherwise, please address any problems and update the GitHub PR. "+ + "When complete, mark this comment as 'Done' and click the [blue 'Reply' button](https://github.com/golang/go/wiki/GerritBot#i-left-a-reply-to-a-comment-in-gerrit-but-no-one-but-me-can-see-it) above.\n\n"+ + "%s\n\n"+ + "(In general for Gerrit code reviews, the CL author is expected to close out each piece of feedback by "+ + "marking it as 'Done' if implemented as suggested or otherwise reply to each review comment. "+ + "See the [Review](https://go.dev/doc/contribute#review) section of the Contributing Guide for details.)", + author, summary) + + gcl, err := b.gerritChangeForPR(pr) + if err != nil { + return fmt.Errorf("could not look up CL after creation for %s: %v", prShortLink(pr), err) + } + unresolved := true + ri := gerrit.ReviewInput{ + Comments: map[string][]gerrit.CommentInput{ + "/PATCHSET_LEVEL": {{Message: msg, Unresolved: &unresolved}}, + }, + } + // TODO: instead of gcl.ID, we might need to do something similar to changeTriple in cmd/coordinator. + err = b.gerritClient.SetReview(ctx, gcl.ChangeID, gcl.ID, ri) + if err != nil { + return fmt.Errorf("could not add findings comment to CL for %s: %v", prShortLink(pr), err) + } + } + } + + return nil } var changeIdentRE = regexp.MustCompile(`(?m)^Change-Id: (I[0-9a-fA-F]{40})\n?`) diff --git a/cmd/gerritbot/internal/rules/rules.go b/cmd/gerritbot/internal/rules/rules.go new file mode 100644 index 0000000000..00a0d007ee --- /dev/null +++ b/cmd/gerritbot/internal/rules/rules.go @@ -0,0 +1,446 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package rules specifies a simple set of rules for checking GitHub PRs or Gerrit CLs +// for certain common mistakes, like no package in the first line of the commit message +// or having long lines in the commit message body. +// +// A rule is primarily defined via a function that takes a Change (CL or PR) as input +// and reports zero or 1 findings, which is just a string (usually 1-2 short sentences). +// A rule can also optionally return a note, which might be auxiliary advice such as +// how to edit the commit message. +// +// The FormatResults function renders the results into markdown. +// Each finding is shown to the user, currently in a numbered list of findings. +// The auxiliary notes are deduplicated, so that for example a set of rules +// looking for problems in a commit message can all return the same editing advice +// but that advice is then only shown to the user only once. +// +// For example, for a commit message with a first line of: +// +// fmt: Improve foo and bar. +// +// That would currently trigger two rules, with an example formatted result of +// the following (including the deduplicated advice at the end): +// +// Possible problems detected: +// 1. The first word in the commit title after the package should be a +// lowercase English word (usually a verb). +// 2. The commit title should not end with a period. +// +// To edit the commit message, see instructions [here](...). For guidance on commit +// messages for the Go project, see [here](...). +// +// Rules currently err on the side of simplicity and avoiding false positives. +// It is is intended to be straightforward to add a new rule. +// +// Rules can be arranged to trigger completely independently of one another, +// or alternatively a set of rules can optionally be arranged in groups that +// form a precedence based on order within the group, where at most one rule +// from that group will trigger. This can be helpful in cases like: +// - You'd like to have a "catch all" rule that should only trigger if another rule +// in the same group hasn't triggered. +// - You have a rule that spots a more general problem and rules that spot +// more specific or pickier variant of the same problem and want to have +// the general problem reported if applicable without also reporting +// pickier findings that would seem redundant. +// - A subset of rules that are otherwise intended to be mutually exclusive. +package rules + +import ( + "fmt" + "strings" +) + +// rule defines a single rule that can report a finding. +// See the package comment for an overview. +type rule struct { + // name is a short internal rule name that don't expect to tweak frequently. + // We don't show it to users, but do use in tests. + name string + + // f is the rule function that reports a single finding and optionally + // an auxiliary note, which for example could be advice on how to edit the commit message. + // Notes are deduplicated across different rules, and ignored for a given rule if there is no finding. + f func(change Change) (finding string, note string) + + // skip lists repos to skip for this rule. + skip []string + + // only lists the only repos that this rule will run against. + only []string +} + +// ruleGroups defines our live set of rules. It is a [][]rule +// because the individual rules are arranged into groups of rules +// that are mutually exclusive, where the first triggering rule wins +// within a []rule in ruleGroups. See the package comment for details. +var ruleGroups = [][]rule{ + { + // We have two rules in this rule group, and hence we report at most one of them. + // The second (pickier) rule is checked only if the first doesn't trigger. + { + name: "title: no package found", + skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. + f: func(change Change) (finding string, note string) { + component, example := packageExample(change.Repo) + finding = fmt.Sprintf("The commit title should start with the primary affected %s name followed by a colon, like \"%s\".", component, example) + start, _, ok := strings.Cut(change.Title, ":") + if !ok { + // No colon. + return finding, commitMessageAdvice + } + var pattern string + switch change.Repo { + default: + // A single package starts with a lowercase ASCII and has no spaces prior to the colon. + pattern = `^[a-z][^ ]+$` + case "website": + // Allow leading underscore (e.g., "_content"). + pattern = `^[a-z_][^ ]+$` + case "vscode-go": + // Allow leading uppercase and period (e.g., ".github/workflow"). + pattern = `^[a-zA-Z.][^ ]+$` + } + if match(pattern, start) { + return "", "" + } + if match(`^(README|DEBUG)`, start) { + // Rare, but also allow a root README or DEBUG file as a component. + return "", "" + } + pkgs := strings.Split(start, ", ") + if match(`^[a-z]`, start) && len(pkgs) > 1 && !matchAny(` `, pkgs) { + // We also allow things like "go/types, types2: ..." (but not "hello, fun world: ..."). + return "", "" + } + return finding, commitMessageAdvice + }, + }, + { + name: "title: no colon then single space after package", + skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. + f: func(change Change) (finding string, note string) { + component, example := packageExample(change.Repo) + finding = fmt.Sprintf("The %s in the commit title should be followed by a colon and single space, like \"%s\".", component, example) + if !match(`[^ ]: [^ ]`, change.Title) { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "title: no lowercase word after a first colon", + skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. + f: func(change Change) (finding string, note string) { + component, _ := packageExample(change.Repo) + finding = fmt.Sprintf("The first word in the commit title after the %s should be a lowercase English word (usually a verb).", component) + _, after, ok := strings.Cut(change.Title, ":") + if !ok { + // No colon. Someone who doesn't have any colon probably can use a reminder about what comes next. + return finding, commitMessageAdvice + } + if !match(`^[ ]*[a-z]+\b`, after) { + // Probably not a lowercase English verb. + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "title: ends with period", + f: func(change Change) (finding string, note string) { + finding = "The commit title should not end with a period." + if len(change.Title) > 0 && change.Title[len(change.Title)-1] == '.' { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + // We have two rules in this group, and hence we report at most one of them. + // The second rule is pickier. + { + name: "body: short", + f: func(change Change) (finding string, note string) { + finding = "The commit message body is %s. " + + "That can be OK if the change is trivial like correcting spelling or fixing a broken link, " + + "but usually the description should provide context for the change and explain what it does in complete sentences." + if !mightBeTrivial(change) { + size := len([]rune(change.Body)) + switch { + case size == 0: + return fmt.Sprintf(finding, "missing"), commitMessageAdvice + case size < 24: // len("Updates golang/go#12345") == 23 + return fmt.Sprintf(finding, "very brief"), commitMessageAdvice + } + } + return "", "" + }, + }, + { + name: "body: no sentence candidates found", + f: func(change Change) (finding string, note string) { + finding = "Are you describing the change in complete sentences with correct punctuation in the commit message body, including ending sentences with periods?" + if !mightBeTrivial(change) && !match("[a-zA-Z0-9\"'`)][.:?!)]( |\\n|$)", change.Body) { + // A complete English sentence usually ends with an alphanumeric immediately followed by a terminating punctuation. + // (This is an approximate test, but currently has a very low false positive rate. Brief manual checking suggests + // ~zero false positives in a corpus of 1000 CLs). + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: long lines", + f: func(change Change) (finding string, note string) { + finding = "Lines in the commit message should be wrapped at ~76 characters unless needed for things like URLs or tables. You have a %d character line." + // Check if something smells like a table, ASCII art, or benchstat output. + if match("----|____|[\u2500-\u257F]", change.Body) || match(` ± [0-9.]+%`, change.Body) { + // This might be something that is allowed to be wide. + // (The Unicode character range above covers box drawing characters, which is probably overkill). + // We are lenient here and don't check the rest of the body, + // mainly to be friendly and also we don't want to guess + // line by line whether something looks like a table. + return "", "" + } + longest := 0 + for _, line := range splitLines(change.Body) { + if match(`https?://|www\.|\.(com|org|dev)`, line) || matchCount(`[/\\]`, line) > 4 { + // Might be a long url or other path. + continue + } + l := len([]rune(line)) + if longest < l { + longest = l + } + } + if longest > 78 { // Official guidance on wiki is "~76". + return fmt.Sprintf(finding, longest), commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: might use markdown", + f: func(change Change) (finding string, note string) { + finding = "Are you using markdown? Markdown should not be used to augment text in the commit message." + if match(`(?i)markdown`, change.Title) || match(`(?i)markdown`, change.Body) { + // Could be a markdown-related fix. + return "", "" + } + if matchCount("(?m)^```", change.Body) >= 2 { + // Looks like a markdown code block. + return finding, commitMessageAdvice + } + // Now look for markdown backticks (which might be most common offender), + // but with a mild attempt to avoid flagging a Go regexp or other Go raw string. + if !match(`regex|string`, change.Title) { + for _, line := range splitLines(change.Body) { + if strings.ContainsAny(line, "={}()[]") || match(`regex|string`, line) { + // This line might contain Go code. This is fairly lenient, but + // in practice, if someone uses markdown once in a CL, they tend to use + // it multiple times, so we often have multiple chances to find a hit. + continue + } + if match("`.+`", line) { + // Could be a markdown backtick. + return finding, commitMessageAdvice + } + } + } + if match(`\[.+]\(https?://.+\)`, change.Body) { + // Looks like a markdown link. (Sorry, currently our ugliest regexp). + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: still contains PR instructions", + f: func(change Change) (finding string, note string) { + finding = "Do you still have the GitHub PR instructions in your commit message text? The PR instructions should be deleted once you have applied them." + if strings.Contains(change.Body, "Delete these instructions once you have read and applied them") { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + { + name: "body: contains Signed-off-by", + f: func(change Change) (finding string, note string) { + finding = "Please do not use 'Signed-off-by'. We instead rely on contributors signing CLAs." + if match(`(?mi)^Signed-off-by: `, change.Body) { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, + { + // We have three rules in this group, and hence we report at most one of them. + // The three rules get progressively pickier. + // We exempt the proposal repo from these rules because almost all GitHub PRs + // for the proposal repo are for typos or similar. + { + name: "body: no bug reference candidate found", + skip: []string{"proposal"}, + f: func(change Change) (finding string, note string) { + finding = "You usually need to reference a bug number for all but trivial or cosmetic fixes. " + + "%s at the end of the commit message. Should you have a bug reference?" + if mightBeTrivial(change) { + return "", "" + } + var bugPattern string + switch usesTracker(change.Repo) { + case mainTracker: + bugPattern = `#\d{4}` + default: + bugPattern = `#\d{2}` + } + if !match(bugPattern, change.Body) { + return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice + } + return "", "" + }, + }, + { + name: "body: bug format looks incorrect", + skip: []string{"proposal"}, + f: func(change Change) (finding string, note string) { + finding = "Do you have the right bug reference format? %s (without a period) at the end of the commit message." + if mightBeTrivial(change) { + return "", "" + } + var bugPattern string + switch { + case change.Repo == "go": + bugPattern = `#\d{4,}$` + case usesTracker(change.Repo) == mainTracker: + bugPattern = `golang/go#\d{4,}$` + case usesTracker(change.Repo) == ownTracker: + bugPattern = fmt.Sprintf(`golang/%s#\d{2,}$`, change.Repo) + default: + bugPattern = `golang/go#\d{4,}$` + } + if !match(`(?m)^(Fixes|Updates|For|Closes|Resolves) `+bugPattern, change.Body) { + return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice + } + return "", "" + }, + }, + { + name: "body: no bug reference candidate at end", + skip: []string{"proposal"}, + f: func(change Change) (finding string, note string) { + // If this rule is running, it means it passed the earlier bug-related rules + // in this group, so we know there is what looks like a well-formed bug. + finding = "It looks like you have a properly formated bug reference, but the convention is to " + + "put bug references at the bottom of the commit message, even if a bug is also mentioned " + + "in the body of the message." + if mightBeTrivial(change) { + return "", "" + } + var bugPattern string + switch { + case usesTracker(change.Repo) == mainTracker: + bugPattern = `#\d{4}` + default: + bugPattern = `#\d{2}` + } + lines := splitLines(change.Body) + if len(lines) > 0 && !match(bugPattern, lines[len(lines)-1]) { + return finding, commitMessageAdvice + } + return "", "" + }, + }, + }, +} + +// Auxiliary advice. +var commitMessageAdvice = "To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). " + + "For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages)." + +// mightBeTrivial reports if a change looks like something +// where a commit body is often allowed to be skipped, such +// as correcting spelling, fixing a broken link, or +// adding or correcting an example. +func mightBeTrivial(change Change) bool { + return match(`(?i)typo|spell|grammar|grammatical|comment|readme|document|\bdocs?\b|example|(fix|correct|broken|wrong).*(link|url)`, change.Title) +} + +// bugExamples returns a snippet of text that includes example bug references +// formatted as expected for a repo. +func bugExamples(repo string) string { + switch { + case repo == "go": + return "For this repo, the format is usually 'Fixes #12345' or 'Updates #12345'" + case usesTracker(repo) == mainTracker: + return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/go#12345' or 'Updates golang/go#12345'", repo) + case usesTracker(repo) == ownTracker: + return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/%s#1234' or 'Updates golang/%s#1234'", repo, repo, repo) + default: + // We don't know how issues should be referenced for this repo, including this might be + // some future created after these rules were last updated, so be a bit vaguer. + return "For most repos outside the main go repo, the format is usually 'Fixes #12345' or 'Updates #12345'" + } +} + +// packageExample returns an example usage of a package/component in a commit title +// for a given repo, along with what to call the leading words before the first +// colon ("package" vs. "component"). +func packageExample(repo string) (component string, example string) { + switch repo { + default: + return "package", "net/http: improve [...]" + case "website": + return "component", "_content/doc/go1.21: fix [...]" + case "vscode-go": + return "component", "src/goInstallTools: improve [...]" + } +} + +// tracker is the issue tracker used by a repo. +type tracker int + +const ( + mainTracker tracker = iota // uses the main golang/go issue tracker. + ownTracker // uses a repo-specific tracker, like golang/vscode-go. + unknownTracker // we don't know what tracker is used, which usually means the main tracker is used. +) + +// usesTracker reports if a repo uses the main golang/go tracker or a repo-specific tracker. +// Callers should deal gracefully with unknownTracker being reported (such as by giving less precise +// advice that is still generally useful), which might happen with a less commonly used repo or some future repo. +func usesTracker(repo string) tracker { + // It's OK for a repo to be missing from our list. If something is missing, either it is not frequently used + // and hence hopefully not a major problem, or in the rare case we are missing something frequently used, + // this can be updated if someone complains. + // TODO: not immediately clear where vulndb should go. In practice, it seems to use the main golang/go tracker, + // but it also has its own tracker (which might just be for security reports?). Leaving unspecified for now, + // which results in vaguer advice. + switch repo { + case "go", "arch", "build", "crypto", "debug", "exp", "image", "mobile", "mod", "net", "perf", "pkgsite", "playground", + "proposal", "review", "sync", "sys", "telemetry", "term", "text", "time", "tools", "tour", "vuln", "website", "xerrors": + return mainTracker + case "vscode-go": + return ownTracker + default: + return unknownTracker + } +} diff --git a/cmd/gerritbot/internal/rules/rules_test.go b/cmd/gerritbot/internal/rules/rules_test.go new file mode 100644 index 0000000000..1266acc8bd --- /dev/null +++ b/cmd/gerritbot/internal/rules/rules_test.go @@ -0,0 +1,589 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rules + +import ( + "strings" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestFormatRuleResults(t *testing.T) { + tests := []struct { + title string + repo string // if empty string, treated as "go" repo + body string + want string + }{ + { + title: `fmt: improve some things`, // We consider this a good commit message title. + body: goodCommitBody, + want: ``, + }, + { + title: `a bad commit message title.`, + body: goodCommitBody, + want: `Possible problems detected: + 1. The commit title should start with the primary affected package name followed by a colon, like "net/http: improve [...]". + 2. The first word in the commit title after the package should be a lowercase English word (usually a verb). + 3. The commit title should not end with a period. + +To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages). +`, + }, + { + title: `A bad vscode-go commit title`, // This verifies we complain about a "component" rather than "package". + repo: "vscode-go", + body: "This includes a bad bug format for vscode-go repo.\nFixes #1234", + want: `Possible problems detected: + 1. The commit title should start with the primary affected component name followed by a colon, like "src/goInstallTools: improve [...]". + 2. The first word in the commit title after the component should be a lowercase English word (usually a verb). + 3. Do you have the right bug reference format? For the vscode-go repo, the format is usually 'Fixes golang/vscode-go#1234' or 'Updates golang/vscode-go#1234' (without a period) at the end of the commit message. + +To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages). +`, + }, + { + title: goodCommitTitle, + body: "This commit body is missing a bug reference.", + want: `Possible problems detected: + 1. You usually need to reference a bug number for all but trivial or cosmetic fixes. For this repo, the format is usually 'Fixes #12345' or 'Updates #12345' at the end of the commit message. Should you have a bug reference? + +To edit the commit message, see instructions [here](https://github.com/golang/go/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages). +`, + }, + } + for _, tt := range tests { + t.Run("title "+tt.title, func(t *testing.T) { + commit := commitMessage(tt.title, tt.body, goodCommitFooters) + repo := "go" + if tt.repo != "" { + repo = tt.repo + } + change, err := ParseCommitMessage(repo, commit) + if err != nil { + t.Fatalf("ParseCommitMessage failed: %v", err) + } + results := Check(change) + got := FormatResults(results) + t.Log("FormatResults:\n" + got) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestTitleRules(t *testing.T) { + tests := []struct { + title string + repo string // if empty string, treated as "go" repo + body string + want []string + }{ + // We consider these good titles. + { + title: "fmt: good", + want: nil, + }, + { + title: "go/types, types2: good", + want: nil, + }, + { + title: "go/types, types2, types3: good", + want: nil, + }, + { + title: "fmt: improve & make: Good", + want: nil, + }, + { + title: "README: fix something", + want: nil, + }, + { + title: "_content/doc/go1.21: fix something", + repo: "website", + want: nil, + }, + { + title: "Fix a proposal", // We are lenient with proposal repo titles. + repo: "proposal", + want: nil, + }, + + // We consider these bad titles. + { + title: "bad.", + want: []string{ + "title: no package found", + "title: no lowercase word after a first colon", + "title: ends with period", + }, + }, + { + title: "bad", + want: []string{ + "title: no package found", + "title: no lowercase word after a first colon", + }, + }, + { + title: "fmt: bad.", + want: []string{ + "title: ends with period", + }, + }, + { + title: "Fmt: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "fmt: Bad", + want: []string{ + "title: no lowercase word after a first colon", + }, + }, + { + title: "fmt: bad", + want: []string{ + "title: no colon then single space after package", + }, + }, + { + title: "fmt: Bad", + want: []string{ + "title: no colon then single space after package", + "title: no lowercase word after a first colon", + }, + }, + { + title: "fmt:bad", + want: []string{ + "title: no colon then single space after package", + }, + }, + { + title: "fmt : bad", + want: []string{ + "title: no package found", + }, + }, + { + title: ": bad", + want: []string{ + "title: no package found", + }, + }, + { + title: " : bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "go/types types2: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "a sentence, with a comma and colon: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "a sentence with a colon: and a wrongly placed package fmt: bad", + want: []string{ + "title: no package found", + }, + }, + { + title: "", + want: []string{ + "title: no package found", + "title: no lowercase word after a first colon", + }, + }, + + // We allow these titles (in interests of simplicity or leniency). + // TODO: are some of these considered an alternative good style? + { + title: "go/types,types2: we allow", + want: nil, + }, + { + title: "cmd/{compile,link}: we allow", + want: nil, + }, + { + title: "cmd/{compile, link}: we allow", + want: nil, + }, + } + for _, tt := range tests { + t.Run("title "+tt.title, func(t *testing.T) { + commit := commitMessage(tt.title, goodCommitBody, goodCommitFooters) + repo := "go" + if tt.repo != "" { + repo = tt.repo + } + change, err := ParseCommitMessage(repo, commit) + if err != nil { + t.Fatalf("ParseCommitMessage failed: %v", err) + } + results := Check(change) + + var got []string + for _, r := range results { + got = append(got, r.Name) + } + + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestBodyRules(t *testing.T) { + tests := []struct { + name string + title string // if empty string, we use goodCommitTitle + repo string // if empty string, treated as "go" repo + body string + want []string + }{ + // We consider these good bodies. + { + name: "good", + body: goodCommitBody, + want: nil, + }, + { + name: "good bug format for go repo", + repo: "go", + body: "This is This is body text.\n\nFixes #1234", + want: nil, + }, + { + name: "good bug format for tools repo", + repo: "tools", + body: "This is This is body text.\n\nFixes golang/go#1234", + want: nil, + }, + { + name: "good bug format for vscode-go", + repo: "vscode-go", + body: "This is This is body text.\n\nFixes golang/vscode-go#1234", + want: nil, + }, + { + name: "good bug format for unknown repo", + repo: "some-future-repo", + body: "This is This is body text.\n\nFixes golang/go#1234", + want: nil, + }, + { + name: "allowed long lines for benchstat output", + body: "Encode/format=json-48 1.718µ ± 1% 1.423µ ± 1% -17.20% (p=0.000 n=10)" + + strings.Repeat("Hello. ", 100) + "\n" + goodCommitBody, + want: nil, + }, + { + name: "a trival fix", + title: "fmt: fix spelling mistakes", + body: "", + want: nil, // we don't flag short body or missing bug reference because "spelling" is in title + }, + + // Now we consider some bad bodies. + // First, some basic mistakes. + { + name: "too short", + body: "Short body", + want: []string{ + "body: short", + "body: no bug reference candidate found", + }, + }, + { + name: "missing body", + body: "", + want: []string{ + "body: short", + "body: no bug reference candidate found", + }, + }, + { + name: "not word wrapped", + body: strings.Repeat("Hello. ", 100) + "\n" + goodCommitBody, + want: []string{ + "body: long lines", + }, + }, + { + name: "not a sentence", + body: "This is missing a period", + want: []string{ + "body: no sentence candidates found", + "body: no bug reference candidate found", + }, + }, + { + name: "Signed-off-by", + body: "Signed-off-by: bad\n\n" + goodCommitBody, + want: []string{ + "body: contains Signed-off-by", + }, + }, + { + name: "PR instructions", + body: "Delete these instructions once you have read and applied them.\n", + want: []string{ + "body: still contains PR instructions", + "body: no bug reference candidate found", + }, + }, + + // Next, mistakes in the repo-specific format or location of bug references. + { + name: "bad bug format for go repo", + repo: "go", + body: "This is body text.\n\nFixes golang/go#1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug format for tools repo", + repo: "tools", + body: "This is body text.\n\nFixes #1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug format for vscode-go", + repo: "vscode-go", + body: "This is body text.\n\nFixes #1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug format for unknown repo", + repo: "some-future-repo", + body: "This is body text.\n\nFixes #1234", + want: []string{ + "body: bug format looks incorrect", + }, + }, + { + name: "bad bug location", + body: "This is body text.\nFixes #1234\nAnd a final line we should not have.", + want: []string{ + "body: no bug reference candidate at end", + }, + }, + + // We next have some good bodies that are markdown-ish, + // but we allow them. + { + name: "allowed markdown: mention regex in title", + title: "regexp: fix something", + body: "Example `.*`.\n" + goodCommitBody, + want: nil, + }, + { + name: "allowed markdown: mention regex in body", + body: "A regex `.*`.\n" + goodCommitBody, + want: nil, + }, + { + name: "allowed markdown: mention markdown", + body: "A markdown bug `and this is ok`.\n" + goodCommitBody, + want: nil, + }, + { + name: "allowed markdown: might be go code", + body: " s := `raw`\n" + goodCommitBody, + want: nil, + }, + + // Examples of using markdown that we flag. + { + name: "markdown backticks", + body: "A variable `foo`.\n" + goodCommitBody, + want: []string{ + "body: might use markdown", + }, + }, + { + name: "markdown block quote", + body: "Some code:\n```\nx := y\n```\n" + goodCommitBody, + want: []string{ + "body: might use markdown", + }, + }, + { + name: "markdown link", + body: "[click here](https://example.com)\n" + goodCommitBody, + want: []string{ + "body: might use markdown", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + title := goodCommitTitle + if tt.title != "" { + title = tt.title + } + commit := commitMessage(title, tt.body, goodCommitFooters) + repo := "go" + if tt.repo != "" { + repo = tt.repo + } + change, err := ParseCommitMessage(repo, commit) + if err != nil { + t.Fatalf("ParseCommitMessage failed: %v", err) + } + results := Check(change) + + var got []string + for _, r := range results { + got = append(got, r.Name) + } + + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestParseCommitMessage(t *testing.T) { + tests := []struct { + name string + repo string + text string + want Change + wantErr bool + }{ + // Bad examples. + { + name: "not enough lines", + text: "title", + want: Change{}, + wantErr: true, + }, + { + name: "second line not blank", + text: "title\nbad line\nBody 1", + want: Change{}, + wantErr: true, + }, + { + name: "no footer", + text: "title\n\nBody 1\n", + want: Change{}, + wantErr: true, + }, + { + name: "no footer and no body", + text: "title\n\n\n", + want: Change{}, + wantErr: true, + }, + + // Good examples. + { + name: "good", + text: "title\n\nBody 1\n\nFooter: 1\n", + want: Change{ + Title: "title", + Body: "Body 1", + }, + wantErr: false, + }, + { + name: "good with two body lines", + text: "title\n\nBody 1\nBody 2\n\nFooter: 1\n", + want: Change{ + Title: "title", + Body: "Body 1\nBody 2", + }, + wantErr: false, + }, + { + name: "good with empty body", + text: "title\n\nFooter: 1\n", + want: Change{ + Title: "title", + Body: "", + }, + wantErr: false, + }, + { + name: "good with extra blank lines after footer", + text: "title\n\nBody 1\n\nFooter: 1\n\n\n", + want: Change{ + Title: "title", + Body: "Body 1", + }, + wantErr: false, + }, + { + name: "good with body line that looks like footer", + text: "title\n\nBody 1\nLink: example.com\n\nFooter: 1\n\n\n", + want: Change{ + Title: "title", + Body: "Body 1\nLink: example.com", + }, + wantErr: false, + }, + { + name: "allowed cherry pick in footer", // Example from CL 346093. + text: "title\n\nBody 1\n\nFooter: 1\n(cherry picked from commit ebd07b13caf35114b32e7d6783b27902af4829ce)\n", + want: Change{ + Title: "title", + Body: "Body 1", + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseCommitMessage(tt.repo, tt.text) + if (err != nil) != tt.wantErr { + t.Errorf("ParseCommitMessage() error = %v, wantErr %v", err, tt.wantErr) + return + } + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("checkRules() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +// commitMessage helps us create valid commit messages while testing. +func commitMessage(title, body, footers string) string { + return title + "\n\n" + body + "\n\n" + footers +} + +// Some auxiliary testing variables available for use when creating commit messages. +var ( + goodCommitTitle = "pkg: a title that does not trigger any rules" + goodCommitBody = "A commit message body that does not trigger any rules.\n\nFixes #1234" + goodCommitFooters = `Change-Id: I1d8d10b142358983194ef2c389de4d9862d4ce97 +GitHub-Last-Rev: 6d27e1471ee5dac0323a10b46e6e64e647068ecf +GitHub-Pull-Request: golang/build#69` +) diff --git a/cmd/gerritbot/internal/rules/run.go b/cmd/gerritbot/internal/rules/run.go new file mode 100644 index 0000000000..7b759eec90 --- /dev/null +++ b/cmd/gerritbot/internal/rules/run.go @@ -0,0 +1,170 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rules + +import ( + "bufio" + "fmt" + "regexp" + "strings" + + "golang.org/x/exp/slices" +) + +// Change represents a Gerrit CL and/or GitHub PR that we want to check rules against. +type Change struct { + // Repo is the repository as reported by Gerrit (e.g., "go", "tools", "vscode-go", "website"). + Repo string + // Title is the commit message first line. + Title string + // Body is the commit message body (skipping the title on the first line and the blank second line, + // and without the footers). + Body string + + // TODO: could consider a Footer field, if useful, though I think GerritBot & Gerrit manage those. + // TODO: could be useful in future to have CL and PR fields as well, e.g., if we wanted to + // spot check for something that looks like test files in the changed file list. +} + +func ParseCommitMessage(repo string, text string) (Change, error) { + change := Change{Repo: repo} + lines := splitLines(text) + if len(lines) < 3 { + return Change{}, fmt.Errorf("rules: ParseCommitMessage: short commit message: %q", text) + } + change.Title = lines[0] + if lines[1] != "" { + return Change{}, fmt.Errorf("rules: ParseCommitMessage: second line is not blank in commit message: %q", text) + } + + // Find the body. + // Trim the footers, starting at bottom and stopping at first blank line seen after any footers. + // It is an error to not have any footers (which likely means we are seeing something not managed + // by GerritBot and/or Gerrit). + body := lines[2:] + sawFooter := false + for i := len(body) - 1; i >= 0; i-- { + if match(`^[a-zA-Z][^ ]*: `, body[i]) { + body = body[:i] + sawFooter = true + continue + } + if match(`^\(cherry picked from commit [a-f0-9]+\)$`, body[i]) { + // One CL in our corpus (CL 346093) has this intermixed with the footers. + continue + } + if body[i] == "" { + if !sawFooter { + continue // We leniently skip any blank lines at bottom of commit message. + } + body = body[:i] + break + } + return Change{}, fmt.Errorf("rules: ParseCommitMessage: found non-footer line at end of commit message. line: %q, commit message: %q", body[i], text) + } + if !sawFooter { + return Change{}, fmt.Errorf("rules: ParseCommitMessage: did not find any footers preceded by blank line for commit message: %q", text) + } + change.Body = strings.Join(body, "\n") + + return change, nil +} + +// Result contains the result of a single rule check against a Change. +type Result struct { + Name string + Finding string + Note string +} + +// Check runs the defined rules against one Change. +func Check(change Change) (results []Result) { + for _, group := range ruleGroups { + for _, rule := range group { + if slices.Contains(rule.skip, change.Repo) || len(rule.only) > 0 && !slices.Contains(rule.only, change.Repo) { + continue + } + finding, advice := rule.f(change) + if finding != "" { + results = append(results, Result{ + Name: rule.name, + Finding: finding, + Note: advice, + }) + break // Only report the first finding per rule group. + } + } + } + return results +} + +// FormatResults returns a string ready to be placed in a CL comment, +// formatted as simple markdown. +func FormatResults(results []Result) string { + if len(results) == 0 { + return "" + } + var b strings.Builder + b.WriteString("Possible problems detected:\n") + cnt := 1 + for _, r := range results { + fmt.Fprintf(&b, " %d. %s\n", cnt, r.Finding) + cnt++ + } + advice := formatAdvice(results) + if advice != "" { + b.WriteString("\n" + advice + "\n") + } + return b.String() +} + +// formatAdvice returns a deduplicated string containing all the advice in results. +func formatAdvice(results []Result) string { + var s []string + seen := make(map[string]bool) + for _, r := range results { + if !seen[r.Note] { + s = append(s, r.Note) + } + seen[r.Note] = true + } + return strings.Join(s, " ") +} + +// match reports whether the regexp pattern matches s, +// returning false for a bad regexp after logging the bad regexp. +func match(pattern string, s string) bool { + re := regexp.MustCompile(pattern) + return re.MatchString(s) +} + +// matchAny reports whether the regexp pattern matches any string in list, +// returning false for a bad regexp after logging the bad regexp. +func matchAny(pattern string, list []string) bool { + re := regexp.MustCompile(pattern) + for _, s := range list { + if re.MatchString(s) { + return true + } + } + return false +} + +// matchAny reports the count of matches for the regexp in s, +// returning 0 for a bad regexp after logging the bad regexp. +func matchCount(pattern string, s string) int { + re := regexp.MustCompile(pattern) + return len(re.FindAllString(s, -1)) +} + +// splitLines returns s split into lines, without trailing \n. +func splitLines(s string) []string { + var lines []string + scanner := bufio.NewScanner(strings.NewReader(s)) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + return lines +}