Skip to content

Commit

Permalink
fix(torbiak#40): correct words count in test and implement with bufio…
Browse files Browse the repository at this point in the history
….ScanWords
  • Loading branch information
Silentroar authored and Silentroar committed Jun 4, 2022
1 parent 0ed51a7 commit ad2159e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 62 deletions.
74 changes: 18 additions & 56 deletions ex7.1/counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@
package counter

import (
"bufio"
"fmt"
"unicode"
"unicode/utf8"
)

type LineCounter struct {
lines int
}

func (c *LineCounter) Write(p []byte) (n int, err error) {
for _, b := range p {
if b == '\n' {
c.lines++
for i, advance, atEOF := 0, 0, false; i < len(p); i += advance {
var token []byte
advance, token, _ = bufio.ScanLines(p[i:], atEOF)
if token == nil {
atEOF = true
continue
}
c.lines++
}
return len(p), nil
}
Expand All @@ -33,60 +36,19 @@ type WordCounter struct {
inWord bool
}

func leadingSpaces(p []byte) int {
count := 0
cur := 0
for cur < len(p) {
r, size := utf8.DecodeRune(p[cur:])
if !unicode.IsSpace(r) {
return count
}
cur += size
count++
}
return count
}

func leadingNonSpaces(p []byte) int {
count := 0
cur := 0
for cur < len(p) {
r, size := utf8.DecodeRune(p[cur:])
if unicode.IsSpace(r) {
return count
}
cur += size
count++
}
return count
}

// A !IsSpace() -> IsSpace() transition is counted as a word.
//
// I couldn't figure out how to use bufio.ScanWords without either
// double-counting words split across buffer boundaries, giving incorrect
// intermediate counts, or doing some really awkward buffer manipulation.
func (c *WordCounter) Write(p []byte) (n int, err error) {
cur := 0
n = len(p)
for {
spaces := leadingSpaces(p[cur:])
cur += spaces
if spaces > 0 {
c.inWord = false
}
if cur == len(p) {
return
}
if !c.inWord {
c.words++
}
c.inWord = true
cur += leadingNonSpaces(p[cur:])
if cur == len(p) {
return
for i, advance, atEOF := 0, 0, false; i < len(p); i += advance {
var token []byte
advance, token, _ = bufio.ScanWords(p[i:], atEOF)
// according to the source code of bufio.ScanWords,
// we should request the final, incomplete word explicitly with atEOF set to true
if token == nil {
atEOF = true
continue
}
c.words++
}
return len(p), nil
}

func (c *WordCounter) N() int {
Expand Down
12 changes: 6 additions & 6 deletions ex7.1/counter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ func TestLineCounter(t *testing.T) {
func TestWordCounter(t *testing.T) {
c := &WordCounter{}
data := [][]byte{
[]byte("The upcoming word is sp"),
[]byte("lit across the buffer boundary. "),
[]byte(" And this one ends on the buffer boundary."),
[]byte(" Last words."),
[]byte("The upcoming word is sp"), // 5
[]byte("lit across the buffer boundary. "), // 5
[]byte(" And this one ends on the buffer boundary."), // 8
[]byte(" Last words."), // 2
}
for _, p := range data {
n, err := c.Write(p)
Expand All @@ -36,8 +36,8 @@ func TestWordCounter(t *testing.T) {
t.Fail()
}
}
if c.N() != 19 {
t.Logf("words: %d != 19", c.N())
if c.N() != 20 {
t.Logf("words: %d != 20", c.N())
t.Fail()
}
}

0 comments on commit ad2159e

Please sign in to comment.