From ad2159eda8b967193b6b31b0f86856d8be3f78b4 Mon Sep 17 00:00:00 2001
From: Silentroar <tujiayu@tujiayudeMacBook-Pro.local>
Date: Sat, 4 Jun 2022 16:22:55 +0800
Subject: [PATCH] fix(#40): correct words count in test and implement with
 bufio.ScanWords

---
 ex7.1/counter.go      | 74 +++++++++++--------------------------------
 ex7.1/counter_test.go | 12 +++----
 2 files changed, 24 insertions(+), 62 deletions(-)

diff --git a/ex7.1/counter.go b/ex7.1/counter.go
index d0bda56..445eee3 100644
--- a/ex7.1/counter.go
+++ b/ex7.1/counter.go
@@ -2,9 +2,8 @@
 package counter
 
 import (
+	"bufio"
 	"fmt"
-	"unicode"
-	"unicode/utf8"
 )
 
 type LineCounter struct {
@@ -12,10 +11,14 @@ type LineCounter struct {
 }
 
 func (c *LineCounter) Write(p []byte) (n int, err error) {
-	for _, b := range p {
-		if b == '\n' {
-			c.lines++
+	for i, advance, atEOF := 0, 0, false; i < len(p); i += advance {
+		var token []byte
+		advance, token, _ = bufio.ScanLines(p[i:], atEOF)
+		if token == nil {
+			atEOF = true
+			continue
 		}
+		c.lines++
 	}
 	return len(p), nil
 }
@@ -33,60 +36,19 @@ type WordCounter struct {
 	inWord bool
 }
 
-func leadingSpaces(p []byte) int {
-	count := 0
-	cur := 0
-	for cur < len(p) {
-		r, size := utf8.DecodeRune(p[cur:])
-		if !unicode.IsSpace(r) {
-			return count
-		}
-		cur += size
-		count++
-	}
-	return count
-}
-
-func leadingNonSpaces(p []byte) int {
-	count := 0
-	cur := 0
-	for cur < len(p) {
-		r, size := utf8.DecodeRune(p[cur:])
-		if unicode.IsSpace(r) {
-			return count
-		}
-		cur += size
-		count++
-	}
-	return count
-}
-
-// A !IsSpace() -> IsSpace() transition is counted as a word.
-//
-// I couldn't figure out how to use bufio.ScanWords without either
-// double-counting words split across buffer boundaries, giving incorrect
-// intermediate counts, or doing some really awkward buffer manipulation.
 func (c *WordCounter) Write(p []byte) (n int, err error) {
-	cur := 0
-	n = len(p)
-	for {
-		spaces := leadingSpaces(p[cur:])
-		cur += spaces
-		if spaces > 0 {
-			c.inWord = false
-		}
-		if cur == len(p) {
-			return
-		}
-		if !c.inWord {
-			c.words++
-		}
-		c.inWord = true
-		cur += leadingNonSpaces(p[cur:])
-		if cur == len(p) {
-			return
+	for i, advance, atEOF := 0, 0, false; i < len(p); i += advance {
+		var token []byte
+		advance, token, _ = bufio.ScanWords(p[i:], atEOF)
+		// according to the source code of bufio.ScanWords,
+		// we should request the final, incomplete word explicitly with atEOF set to true
+		if token == nil {
+			atEOF = true
+			continue
 		}
+		c.words++
 	}
+	return len(p), nil
 }
 
 func (c *WordCounter) N() int {
diff --git a/ex7.1/counter_test.go b/ex7.1/counter_test.go
index 23c18f1..2a8fa71 100644
--- a/ex7.1/counter_test.go
+++ b/ex7.1/counter_test.go
@@ -24,10 +24,10 @@ func TestLineCounter(t *testing.T) {
 func TestWordCounter(t *testing.T) {
 	c := &WordCounter{}
 	data := [][]byte{
-		[]byte("The upcoming word is sp"),
-		[]byte("lit across the buffer boundary. "),
-		[]byte(" And this one ends on the buffer boundary."),
-		[]byte(" Last words."),
+		[]byte("The upcoming word is sp"),                    // 5
+		[]byte("lit across the buffer boundary. "),           // 5
+		[]byte(" And this one ends on the buffer boundary."), // 8
+		[]byte(" Last words."),                               // 2
 	}
 	for _, p := range data {
 		n, err := c.Write(p)
@@ -36,8 +36,8 @@ func TestWordCounter(t *testing.T) {
 			t.Fail()
 		}
 	}
-	if c.N() != 19 {
-		t.Logf("words: %d != 19", c.N())
+	if c.N() != 20 {
+		t.Logf("words: %d != 20", c.N())
 		t.Fail()
 	}
 }