From e55e167cf4525a3d9483a1b9ff46b4c3edbdf05a Mon Sep 17 00:00:00 2001 From: "kayos@tcp.direct" Date: Thu, 27 Jun 2024 21:49:41 -0700 Subject: [PATCH] Fix: embolden test cases, improve concurrent hashing --- Makefile | 3 + go.mod | 2 + go.sum | 2 + hash.go | 40 ++++++++-- sandfly-entropyscan_test.go | 145 +++++++++++++++++++++++++----------- 5 files changed, 142 insertions(+), 50 deletions(-) diff --git a/Makefile b/Makefile index 772dade..79581fb 100644 --- a/Makefile +++ b/Makefile @@ -9,5 +9,8 @@ fmt : check : go vet ./... +test : check + go test -v ./... + clean : rm sandfly-entropyscan || true diff --git a/go.mod b/go.mod index 799090e..7b38dc7 100644 --- a/go.mod +++ b/go.mod @@ -3,3 +3,5 @@ module github.com/sandflysecurity/sandfly-entropyscan go 1.19 require github.com/panjf2000/ants/v2 v2.9.1 + +require git.tcp.direct/kayos/common v0.9.7 // indirect diff --git a/go.sum b/go.sum index 520294c..f58c364 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +git.tcp.direct/kayos/common v0.9.7 h1:k2k3fvvEFN9JV+0nyVWLoV8cGRDAhS/8ECO9tEKN+to= +git.tcp.direct/kayos/common v0.9.7/go.mod h1:mmTOIi7k99yygTa1FSOZNoFEEbSTOQV/QpTLUaQU9Tk= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/hash.go b/hash.go index c604692..c7c6017 100644 --- a/hash.go +++ b/hash.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "crypto/md5" "crypto/sha1" "crypto/sha256" @@ -10,10 +11,14 @@ import ( "hash" "io" "sync" + + "git.tcp.direct/kayos/common/pool" ) type HashType uint8 +var bufs = pool.NewBufferFactory() + const ( HashNull HashType = iota HashTypeMD5 @@ -81,26 +86,43 @@ func (m *MultiHasher) Hash(r io.Reader) (map[HashType]string, error) { var ( res = make(map[HashType]string, len(m.todo)) errCh = make(chan error, len(m.todo)) + errs = make([]error, 0, len(m.todo)) mu sync.Mutex ) + bigBuf := bufs.Get() + defer bufs.MustPut(bigBuf) + + fileN, readErr := bigBuf.ReadFrom(r) + if readErr != nil && (!errors.Is(readErr, io.EOF) && fileN != 0) { + return nil, readErr + } + if fileN == 0 { + return nil, errors.New("no data read") + } + + // we avoid reading directly from the reader incase it needs a rewind and avoid + // repeating potential disk reads by reading once into bigBuf and creating + // [bytes.Reader] instances from it's internal []byte slice within the goroutines. + bufRaw := bigBuf.Bytes() + wg := new(sync.WaitGroup) wg.Add(len(m.todo)) for _, ht := range m.todo { go func(myHt HashType, myWg *sync.WaitGroup) { + defer myWg.Done() f, ok := HashEngines[myHt] if !ok { - panic("hash engine not found: " + ht.String()) + panic("hash engine not found: " + myHt.String()) } h := f() - defer myWg.Done() buf := getBuf() defer putBuf(buf) - n, err := io.CopyBuffer(h, r, buf) + n, err := io.CopyBuffer(h, bytes.NewReader(bufRaw), buf) if err != nil || n == 0 { if err == nil { - err = errors.New("no data written") + err = errors.New(myHt.String() + ": no data written") } errCh <- err return @@ -113,7 +135,15 @@ func (m *MultiHasher) Hash(r io.Reader) (map[HashType]string, error) { wg.Wait() - return res, nil + close(errCh) + + for err := range errCh { + if err != nil { + errs = append(errs, err) + } + } + + return res, errors.Join(errs...) } func (m *MultiHasher) HashFile(path string) (map[HashType]string, error) { diff --git a/sandfly-entropyscan_test.go b/sandfly-entropyscan_test.go index af040f8..d38a588 100644 --- a/sandfly-entropyscan_test.go +++ b/sandfly-entropyscan_test.go @@ -50,56 +50,114 @@ func TestResultChecksums(t *testing.T) { Checksums: new(Checksums), } - results := NewResults() + t.Run("all", func(t *testing.T) { + results := NewResults() - cfg := newConfigFromFlags() - cfg.sumMD5 = true - cfg.sumSHA1 = true - cfg.sumSHA256 = true - cfg.sumSHA512 = true - if err = cfg.runEnabledHashers(yeet); err != nil { - t.Fatalf("unexpected error: %v", err) - } + cfg := newConfigFromFlags() + cfg.hashers = []HashType{HashTypeMD5, HashTypeSHA1, HashTypeSHA256, HashTypeSHA512} - for i, h := range []string{yeet.Checksums.MD5, yeet.Checksums.SHA1, yeet.Checksums.SHA256, yeet.Checksums.SHA512} { - chkName := "md5" - switch i { - case 1: - chkName = "sha1" - case 2: - chkName = "sha256" - case 3: - chkName = "sha512" + if err = cfg.runEnabledHashers(yeet); err != nil { + t.Fatalf("unexpected error: %v", err) } - if strings.TrimSpace(h) == "" { - t.Errorf("expected %s hash but got empty string", chkName) + + for i, h := range []string{yeet.Checksums.MD5, yeet.Checksums.SHA1, yeet.Checksums.SHA256, yeet.Checksums.SHA512} { + chkName := "md5" + switch i { + case 1: + chkName = "sha1" + case 2: + chkName = "sha256" + case 3: + chkName = "sha512" + } + if strings.TrimSpace(h) == "" { + t.Errorf("expected %s hash but got empty string", chkName) + } + // t.Logf("%s: %s", chkName, h) } - // t.Logf("%s: %s", chkName, h) - } - results.Add(yeet) + results.Add(yeet) + + t.Run("csv", func(t *testing.T) { + expected := []byte("filename,path,entropy,elf_file,md5,sha1,sha256,sha512\n" + + "yeet," + path + "," + "0.50,false," + yeet.Checksums.MD5 + "," + + yeet.Checksums.SHA1 + "," + yeet.Checksums.SHA256 + "," + + yeet.Checksums.SHA512 + "\n", + ) + + result, err := results.MarshalCSV() + + if err != nil { + t.Errorf("\n\nunexpected error:\n %v", err) + } + + if !strings.EqualFold(string(result), string(expected)) { + t.Errorf("\n\nexpected:\n"+ + "%s \n"+ + "got: \n"+ + "%s\n\n", + string(expected), + string(result), + ) + } + }) + }) - expected := []byte("filename,path,entropy,elf_file,md5,sha1,sha256,sha512\n" + - "yeet," + path + "," + "0.50,false," + yeet.Checksums.MD5 + "," + - yeet.Checksums.SHA1 + "," + yeet.Checksums.SHA256 + "," + - yeet.Checksums.SHA512 + "\n", - ) + t.Run("some", func(t *testing.T) { + yeet.Checksums = new(Checksums) + results := NewResults() - result, err := results.MarshalCSV() + cfg := newConfigFromFlags() + cfg.hashers = []HashType{HashTypeMD5, HashTypeSHA1} - if err != nil { - t.Errorf("\n\nunexpected error:\n %v", err) - } + if err = cfg.runEnabledHashers(yeet); err != nil { + t.Fatalf("unexpected error: %v", err) + } - if !strings.EqualFold(string(result), string(expected)) { - t.Errorf("\n\nexpected:\n"+ - "%s \n"+ - "got: \n"+ - "%s\n\n", - string(expected), - string(result), - ) - } + for i, h := range []string{yeet.Checksums.MD5, yeet.Checksums.SHA1, yeet.Checksums.SHA256, yeet.Checksums.SHA512} { + chkName := "md5" + switch i { + case 1: + chkName = "sha1" + case 2: + chkName = "sha256" + case 3: + chkName = "sha512" + } + if (i < 2) && strings.TrimSpace(h) == "" { + t.Errorf("expected %s hash but got empty string", chkName) + } + if i > 2 && strings.TrimSpace(h) != "" { + t.Errorf("expected empty string but got %s", h) + } + } + + results.Add(yeet) + + t.Run("csv", func(t *testing.T) { + expected := []byte("filename,path,entropy,elf_file,md5,sha1,sha256,sha512\n" + + "yeet," + path + "," + "0.50,false," + yeet.Checksums.MD5 + "," + + yeet.Checksums.SHA1 + "," + "" + "," + + "" + "\n", + ) + + result, err := results.MarshalCSV() + + if err != nil { + t.Errorf("\n\nunexpected error:\n %v", err) + } + + if !strings.EqualFold(string(result), string(expected)) { + t.Errorf("\n\nexpected:\n"+ + "%s \n"+ + "got: \n"+ + "%s\n\n", + string(expected), + string(result), + ) + } + }) + }) } func TestResultsCustomSchema(t *testing.T) { @@ -272,10 +330,7 @@ func TestParseNonNilPointer(t *testing.T) { func TestJSONCSVParityAndCheckOwnPID(t *testing.T) { csv := defCSVHeader cfg := newConfigFromFlags() - cfg.sumMD5 = true - cfg.sumSHA1 = true - cfg.sumSHA256 = true - cfg.sumSHA512 = true + cfg.hashers = []HashType{HashTypeMD5, HashTypeSHA1, HashTypeSHA256, HashTypeSHA512} myPID := os.Getpid() procfsTarget := filepath.Join(constProcDir, strconv.Itoa(myPID), "/exe")