From ad2159eda8b967193b6b31b0f86856d8be3f78b4 Mon Sep 17 00:00:00 2001 From: Silentroar Date: Sat, 4 Jun 2022 16:22:55 +0800 Subject: [PATCH] fix(#40): correct words count in test and implement with bufio.ScanWords --- ex7.1/counter.go | 74 +++++++++++-------------------------------- ex7.1/counter_test.go | 12 +++---- 2 files changed, 24 insertions(+), 62 deletions(-) diff --git a/ex7.1/counter.go b/ex7.1/counter.go index d0bda56..445eee3 100644 --- a/ex7.1/counter.go +++ b/ex7.1/counter.go @@ -2,9 +2,8 @@ package counter import ( + "bufio" "fmt" - "unicode" - "unicode/utf8" ) type LineCounter struct { @@ -12,10 +11,14 @@ type LineCounter struct { } func (c *LineCounter) Write(p []byte) (n int, err error) { - for _, b := range p { - if b == '\n' { - c.lines++ + for i, advance, atEOF := 0, 0, false; i < len(p); i += advance { + var token []byte + advance, token, _ = bufio.ScanLines(p[i:], atEOF) + if token == nil { + atEOF = true + continue } + c.lines++ } return len(p), nil } @@ -33,60 +36,19 @@ type WordCounter struct { inWord bool } -func leadingSpaces(p []byte) int { - count := 0 - cur := 0 - for cur < len(p) { - r, size := utf8.DecodeRune(p[cur:]) - if !unicode.IsSpace(r) { - return count - } - cur += size - count++ - } - return count -} - -func leadingNonSpaces(p []byte) int { - count := 0 - cur := 0 - for cur < len(p) { - r, size := utf8.DecodeRune(p[cur:]) - if unicode.IsSpace(r) { - return count - } - cur += size - count++ - } - return count -} - -// A !IsSpace() -> IsSpace() transition is counted as a word. -// -// I couldn't figure out how to use bufio.ScanWords without either -// double-counting words split across buffer boundaries, giving incorrect -// intermediate counts, or doing some really awkward buffer manipulation. func (c *WordCounter) Write(p []byte) (n int, err error) { - cur := 0 - n = len(p) - for { - spaces := leadingSpaces(p[cur:]) - cur += spaces - if spaces > 0 { - c.inWord = false - } - if cur == len(p) { - return - } - if !c.inWord { - c.words++ - } - c.inWord = true - cur += leadingNonSpaces(p[cur:]) - if cur == len(p) { - return + for i, advance, atEOF := 0, 0, false; i < len(p); i += advance { + var token []byte + advance, token, _ = bufio.ScanWords(p[i:], atEOF) + // according to the source code of bufio.ScanWords, + // we should request the final, incomplete word explicitly with atEOF set to true + if token == nil { + atEOF = true + continue } + c.words++ } + return len(p), nil } func (c *WordCounter) N() int { diff --git a/ex7.1/counter_test.go b/ex7.1/counter_test.go index 23c18f1..2a8fa71 100644 --- a/ex7.1/counter_test.go +++ b/ex7.1/counter_test.go @@ -24,10 +24,10 @@ func TestLineCounter(t *testing.T) { func TestWordCounter(t *testing.T) { c := &WordCounter{} data := [][]byte{ - []byte("The upcoming word is sp"), - []byte("lit across the buffer boundary. "), - []byte(" And this one ends on the buffer boundary."), - []byte(" Last words."), + []byte("The upcoming word is sp"), // 5 + []byte("lit across the buffer boundary. "), // 5 + []byte(" And this one ends on the buffer boundary."), // 8 + []byte(" Last words."), // 2 } for _, p := range data { n, err := c.Write(p) @@ -36,8 +36,8 @@ func TestWordCounter(t *testing.T) { t.Fail() } } - if c.N() != 19 { - t.Logf("words: %d != 19", c.N()) + if c.N() != 20 { + t.Logf("words: %d != 20", c.N()) t.Fail() } }