luno · andrewwormald · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026 · coderabbitai
diff --git a/errors.go b/errors.go
@@ -10,9 +10,11 @@ var (
 	ErrInvalidTransition    = errors.New("invalid transition")
 )
 
-// ErrorCounter defines an interface for counting occurrences of errors with optional labels.
+// ErrorCounter defines an interface for counting occurrences of errors keyed by stable labels.
+// At least one label is required — labels should identify the process and run (e.g. processName, runID).
+// The error value is not used for keying because error messages often contain dynamic data.
 type ErrorCounter interface {
-	Add(err error, labels ...string) int
-	Count(err error, labels ...string) int
-	Clear(err error, labels ...string)
+	Add(err error, label string, extras ...string) int
+	Count(err error, label string, extras ...string) int
+	Clear(err error, label string, extras ...string)
 }
diff --git a/internal/errorcounter/errorcounter.go b/internal/errorcounter/errorcounter.go
@@ -16,31 +16,37 @@ type Counter struct {
 	store map[string]int
 }
 
-func (c *Counter) Add(err error, labels ...string) int {
+func (c *Counter) Add(err error, label string, extras ...string) int {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	errMsg := err.Error()
-	errMsg += strings.Join(labels, "-")
-	c.store[errMsg] += 1
-	return c.store[errMsg]
+	key := makeKey(label, extras)
+	c.store[key] += 1
+	return c.store[key]
 }
 
-func (c *Counter) Count(err error, labels ...string) int {
+func (c *Counter) Count(err error, label string, extras ...string) int {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	errMsg := err.Error()
-	errMsg += strings.Join(labels, "-")
-	return c.store[errMsg]
+	key := makeKey(label, extras)
+	return c.store[key]
 }
 
-func (c *Counter) Clear(err error, labels ...string) {
+func (c *Counter) Clear(err error, label string, extras ...string) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	errMsg := err.Error()
-	errMsg += strings.Join(labels, "-")
-	c.store[errMsg] = 0
-	return
+	key := makeKey(label, extras)
+	delete(c.store, key)
+}
+
+// makeKey builds a stable key from labels only. The error message is excluded
+// because it often contains dynamic data (timestamps, IDs) which would create
+// unique keys and prevent the PauseAfterErrCount threshold from ever being reached.
+func makeKey(label string, extras []string) string {
+	if len(extras) == 0 {
+		return label
+	}
+	return label + "-" + strings.Join(extras, "-")
 }
diff --git a/internal/errorcounter/errorcounter_test.go b/internal/errorcounter/errorcounter_test.go
@@ -10,52 +10,77 @@ import (
 )
 
 func TestErrorCounter(t *testing.T) {
-	testCases := []struct {
-		name           string
-		inputErr       error
-		labels         []string
-		iterationCount int
-		expectedCount  int
-	}{
-		{
-			name:           "Add 3 and get 3",
-			inputErr:       errors.New("test error"),
-			labels:         []string{"label 1", "label 2"},
-			iterationCount: 3,
-			expectedCount:  3,
-		},
-		{
-			name:           "Add 1 and get 1 - no labels",
-			inputErr:       errors.New("test error"),
-			labels:         []string{},
-			iterationCount: 3,
-			expectedCount:  3,
-		},
-		{
-			name:           "Add 0 and get 0",
-			inputErr:       errors.New("test error"),
-			labels:         []string{"label 1"},
-			iterationCount: 0,
-			expectedCount:  0,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			c := errorcounter.New()
-
-			var currentCount int
-			for i := 0; i < tc.iterationCount; i++ {
-				currentCount = c.Add(tc.inputErr, tc.labels...)
-			}
-			require.Equal(t, tc.expectedCount, currentCount)
-
-			count := c.Count(tc.inputErr, tc.labels...)
-			require.Equal(t, tc.expectedCount, count)
-
-			c.Clear(tc.inputErr, tc.labels...)
-			count = c.Count(tc.inputErr, tc.labels...)
-			require.Equal(t, 0, count)
-		})
-	}
+	t.Run("Add 3 and get 3", func(t *testing.T) {
+		c := errorcounter.New()
+		err := errors.New("test error")
+
+		c.Add(err, "label 1", "label 2")
+		c.Add(err, "label 1", "label 2")
+		count := c.Add(err, "label 1", "label 2")
+		require.Equal(t, 3, count)
+
+		require.Equal(t, 3, c.Count(err, "label 1", "label 2"))
+
+		c.Clear(err, "label 1", "label 2")
+		require.Equal(t, 0, c.Count(err, "label 1", "label 2"))
+	})
+
+	t.Run("Single label", func(t *testing.T) {
+		c := errorcounter.New()
+		err := errors.New("test error")
+
+		c.Add(err, "only-label")
+		count := c.Add(err, "only-label")
+		require.Equal(t, 2, count)
+
+		require.Equal(t, 2, c.Count(err, "only-label"))
+
+		c.Clear(err, "only-label")
+		require.Equal(t, 0, c.Count(err, "only-label"))
+	})
+
+	t.Run("Add 0 and get 0", func(t *testing.T) {
+		c := errorcounter.New()
+		require.Equal(t, 0, c.Count(errors.New("test error"), "label 1"))
+	})
+}
+
+func TestErrorCounter_DifferentErrorsSameLabels(t *testing.T) {
+	c := errorcounter.New()
+
+	// Different error messages with the same labels should share a counter.
+	c.Add(errors.New("connection refused at 10:00:01"), "processName", "run-123")
+	c.Add(errors.New("connection refused at 10:00:02"), "processName", "run-123")
+	count := c.Add(errors.New("timeout after 30s"), "processName", "run-123")
+
+	require.Equal(t, 3, count)
+
+	// Count should work regardless of which error is passed.
+	require.Equal(t, 3, c.Count(errors.New("completely different error"), "processName", "run-123"))
+}
+
+func TestErrorCounter_ClearRemovesKey(t *testing.T) {
+	c := errorcounter.New()
+
+	c.Add(errors.New("err"), "process", "run-1")
+	c.Add(errors.New("err"), "process", "run-1")
+	require.Equal(t, 2, c.Count(errors.New("err"), "process", "run-1"))
+
+	c.Clear(errors.New("err"), "process", "run-1")
+
+	// After clear, count should be 0 and next Add should return 1.
+	require.Equal(t, 0, c.Count(errors.New("err"), "process", "run-1"))
+	require.Equal(t, 1, c.Add(errors.New("err"), "process", "run-1"))
+}
+
+func TestErrorCounter_DifferentLabelsSeparateCounters(t *testing.T) {
+	c := errorcounter.New()
+	err := errors.New("same error")
+
+	c.Add(err, "process-a", "run-1")
+	c.Add(err, "process-a", "run-1")
+	c.Add(err, "process-b", "run-2")
+
+	require.Equal(t, 2, c.Count(err, "process-a", "run-1"))
+	require.Equal(t, 1, c.Count(err, "process-b", "run-2"))
 }