Skip to content

Commit

Permalink
Fix: embolden test cases, improve concurrent hashing
Browse files Browse the repository at this point in the history
  • Loading branch information
yunginnanet committed Jun 28, 2024
1 parent 18edaba commit e55e167
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 50 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@ fmt :
check :
go vet ./...

test : check
go test -v ./...

clean :
rm sandfly-entropyscan || true
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ module github.com/sandflysecurity/sandfly-entropyscan
go 1.19

require github.com/panjf2000/ants/v2 v2.9.1

require git.tcp.direct/kayos/common v0.9.7 // indirect
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
git.tcp.direct/kayos/common v0.9.7 h1:k2k3fvvEFN9JV+0nyVWLoV8cGRDAhS/8ECO9tEKN+to=
git.tcp.direct/kayos/common v0.9.7/go.mod h1:mmTOIi7k99yygTa1FSOZNoFEEbSTOQV/QpTLUaQU9Tk=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
40 changes: 35 additions & 5 deletions hash.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"bytes"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
Expand All @@ -10,10 +11,14 @@ import (
"hash"
"io"
"sync"

"git.tcp.direct/kayos/common/pool"
)

type HashType uint8

var bufs = pool.NewBufferFactory()

const (
HashNull HashType = iota
HashTypeMD5
Expand Down Expand Up @@ -81,26 +86,43 @@ func (m *MultiHasher) Hash(r io.Reader) (map[HashType]string, error) {
var (
res = make(map[HashType]string, len(m.todo))
errCh = make(chan error, len(m.todo))
errs = make([]error, 0, len(m.todo))
mu sync.Mutex
)

bigBuf := bufs.Get()
defer bufs.MustPut(bigBuf)

fileN, readErr := bigBuf.ReadFrom(r)
if readErr != nil && (!errors.Is(readErr, io.EOF) && fileN != 0) {
return nil, readErr
}
if fileN == 0 {
return nil, errors.New("no data read")
}

// we avoid reading directly from the reader incase it needs a rewind and avoid
// repeating potential disk reads by reading once into bigBuf and creating
// [bytes.Reader] instances from it's internal []byte slice within the goroutines.
bufRaw := bigBuf.Bytes()

wg := new(sync.WaitGroup)
wg.Add(len(m.todo))

for _, ht := range m.todo {
go func(myHt HashType, myWg *sync.WaitGroup) {
defer myWg.Done()
f, ok := HashEngines[myHt]
if !ok {
panic("hash engine not found: " + ht.String())
panic("hash engine not found: " + myHt.String())
}
h := f()
defer myWg.Done()
buf := getBuf()
defer putBuf(buf)
n, err := io.CopyBuffer(h, r, buf)
n, err := io.CopyBuffer(h, bytes.NewReader(bufRaw), buf)
if err != nil || n == 0 {
if err == nil {
err = errors.New("no data written")
err = errors.New(myHt.String() + ": no data written")
}
errCh <- err
return
Expand All @@ -113,7 +135,15 @@ func (m *MultiHasher) Hash(r io.Reader) (map[HashType]string, error) {

wg.Wait()

return res, nil
close(errCh)

for err := range errCh {
if err != nil {
errs = append(errs, err)
}
}

return res, errors.Join(errs...)
}

func (m *MultiHasher) HashFile(path string) (map[HashType]string, error) {
Expand Down
145 changes: 100 additions & 45 deletions sandfly-entropyscan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,56 +50,114 @@ func TestResultChecksums(t *testing.T) {
Checksums: new(Checksums),
}

results := NewResults()
t.Run("all", func(t *testing.T) {
results := NewResults()

cfg := newConfigFromFlags()
cfg.sumMD5 = true
cfg.sumSHA1 = true
cfg.sumSHA256 = true
cfg.sumSHA512 = true
if err = cfg.runEnabledHashers(yeet); err != nil {
t.Fatalf("unexpected error: %v", err)
}
cfg := newConfigFromFlags()
cfg.hashers = []HashType{HashTypeMD5, HashTypeSHA1, HashTypeSHA256, HashTypeSHA512}

for i, h := range []string{yeet.Checksums.MD5, yeet.Checksums.SHA1, yeet.Checksums.SHA256, yeet.Checksums.SHA512} {
chkName := "md5"
switch i {
case 1:
chkName = "sha1"
case 2:
chkName = "sha256"
case 3:
chkName = "sha512"
if err = cfg.runEnabledHashers(yeet); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if strings.TrimSpace(h) == "" {
t.Errorf("expected %s hash but got empty string", chkName)

for i, h := range []string{yeet.Checksums.MD5, yeet.Checksums.SHA1, yeet.Checksums.SHA256, yeet.Checksums.SHA512} {
chkName := "md5"
switch i {
case 1:
chkName = "sha1"
case 2:
chkName = "sha256"
case 3:
chkName = "sha512"
}
if strings.TrimSpace(h) == "" {
t.Errorf("expected %s hash but got empty string", chkName)
}
// t.Logf("%s: %s", chkName, h)
}
// t.Logf("%s: %s", chkName, h)
}

results.Add(yeet)
results.Add(yeet)

t.Run("csv", func(t *testing.T) {
expected := []byte("filename,path,entropy,elf_file,md5,sha1,sha256,sha512\n" +
"yeet," + path + "," + "0.50,false," + yeet.Checksums.MD5 + "," +
yeet.Checksums.SHA1 + "," + yeet.Checksums.SHA256 + "," +
yeet.Checksums.SHA512 + "\n",
)

result, err := results.MarshalCSV()

if err != nil {
t.Errorf("\n\nunexpected error:\n %v", err)
}

if !strings.EqualFold(string(result), string(expected)) {
t.Errorf("\n\nexpected:\n"+
"%s \n"+
"got: \n"+
"%s\n\n",
string(expected),
string(result),
)
}
})
})

expected := []byte("filename,path,entropy,elf_file,md5,sha1,sha256,sha512\n" +
"yeet," + path + "," + "0.50,false," + yeet.Checksums.MD5 + "," +
yeet.Checksums.SHA1 + "," + yeet.Checksums.SHA256 + "," +
yeet.Checksums.SHA512 + "\n",
)
t.Run("some", func(t *testing.T) {
yeet.Checksums = new(Checksums)
results := NewResults()

result, err := results.MarshalCSV()
cfg := newConfigFromFlags()
cfg.hashers = []HashType{HashTypeMD5, HashTypeSHA1}

if err != nil {
t.Errorf("\n\nunexpected error:\n %v", err)
}
if err = cfg.runEnabledHashers(yeet); err != nil {
t.Fatalf("unexpected error: %v", err)
}

if !strings.EqualFold(string(result), string(expected)) {
t.Errorf("\n\nexpected:\n"+
"%s \n"+
"got: \n"+
"%s\n\n",
string(expected),
string(result),
)
}
for i, h := range []string{yeet.Checksums.MD5, yeet.Checksums.SHA1, yeet.Checksums.SHA256, yeet.Checksums.SHA512} {
chkName := "md5"
switch i {
case 1:
chkName = "sha1"
case 2:
chkName = "sha256"
case 3:
chkName = "sha512"
}
if (i < 2) && strings.TrimSpace(h) == "" {
t.Errorf("expected %s hash but got empty string", chkName)
}
if i > 2 && strings.TrimSpace(h) != "" {
t.Errorf("expected empty string but got %s", h)
}
}

results.Add(yeet)

t.Run("csv", func(t *testing.T) {
expected := []byte("filename,path,entropy,elf_file,md5,sha1,sha256,sha512\n" +
"yeet," + path + "," + "0.50,false," + yeet.Checksums.MD5 + "," +
yeet.Checksums.SHA1 + "," + "" + "," +
"" + "\n",
)

result, err := results.MarshalCSV()

if err != nil {
t.Errorf("\n\nunexpected error:\n %v", err)
}

if !strings.EqualFold(string(result), string(expected)) {
t.Errorf("\n\nexpected:\n"+
"%s \n"+
"got: \n"+
"%s\n\n",
string(expected),
string(result),
)
}
})
})
}

func TestResultsCustomSchema(t *testing.T) {
Expand Down Expand Up @@ -272,10 +330,7 @@ func TestParseNonNilPointer(t *testing.T) {
func TestJSONCSVParityAndCheckOwnPID(t *testing.T) {
csv := defCSVHeader
cfg := newConfigFromFlags()
cfg.sumMD5 = true
cfg.sumSHA1 = true
cfg.sumSHA256 = true
cfg.sumSHA512 = true
cfg.hashers = []HashType{HashTypeMD5, HashTypeSHA1, HashTypeSHA256, HashTypeSHA512}

myPID := os.Getpid()
procfsTarget := filepath.Join(constProcDir, strconv.Itoa(myPID), "/exe")
Expand Down

0 comments on commit e55e167

Please sign in to comment.