-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Always copy PHP and legacy CLI files if they have changed #94
base: main
Are you sure you want to change the base?
Changes from 2 commits
7c90b25
f19ce9d
f0a1218
3a43a7c
eede8ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
package file | ||
|
||
import ( | ||
"bytes" | ||
"crypto/sha256" | ||
"encoding/hex" | ||
"fmt" | ||
"io" | ||
"os" | ||
|
||
"github.com/spf13/afero" | ||
pjcdawkins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
const hashExt = ".sha256" | ||
|
||
// Copy copies a file from the given bytes to destination. | ||
func Copy(destination string, fin []byte) error { | ||
if _, err := os.Stat(destination); err != nil && !os.IsNotExist(err) { | ||
pjcdawkins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return fmt.Errorf("could not stat file: %w", err) | ||
} | ||
|
||
fout, err := os.Create(destination) | ||
if err != nil { | ||
return fmt.Errorf("could not create file: %w", err) | ||
} | ||
defer fout.Close() | ||
|
||
r := bytes.NewReader(fin) | ||
|
||
if _, err := io.Copy(fout, r); err != nil { | ||
return fmt.Errorf("could copy file: %w", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// CheckSize checks if a file exists and has an exact size. | ||
func CheckSize(filename string, size int) (bool, error) { | ||
stat, err := os.Stat(filename) | ||
if err != nil { | ||
if os.IsNotExist(err) { | ||
return false, nil | ||
} | ||
return false, fmt.Errorf("could not stat file: %w", err) | ||
} | ||
return stat.Size() == int64(size), nil | ||
} | ||
|
||
var testableFS = afero.NewOsFs() | ||
|
||
// CheckHash checks if a file has the given SHA256 hash. | ||
// It supports reading the file's current hash from a static file saved next to it with the hashExt extension. | ||
func CheckHash(filename, hash string) (bool, error) { | ||
if fh, err := afero.ReadFile(testableFS, filename+hashExt); err == nil { | ||
return string(fh) == hash, nil | ||
} | ||
fh, err := sha256Sum(testableFS, filename) | ||
if err != nil { | ||
if os.IsNotExist(err) { | ||
return false, nil | ||
} | ||
return true, err | ||
} | ||
return fh == hash, nil | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The hash should be computed from the file itself, otherwise the hash can be tampered in the same way that the file was tampered. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not used for tamper protection as you say - the pregenerated hashes could almost just be random numbers - I went for real hashes just so there's the possibility of checking against accidental corruption of the copied files themselves and the fallback of being able to recompute when the hash file wasn't copied. But broadly I was going for not having to compute hashes at runtime at all. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recomputing the hash from the file on every run would theoretically use time and memory that we don't want or need to use - the purpose of this PR is to make sure we have updated files when we change them, the user is free to break their own installation (and it can be fixed again by deleting the main file or hash). |
||
|
||
func SaveHash(filename, hash string) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no need to maintain the hash, as the file containing the hash can be tampered with as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see other comments #94 (comment) |
||
return Copy(filename+hashExt, []byte(hash)) | ||
} | ||
|
||
// sha256Sum calculates the SHA256 hash of a file. | ||
func sha256Sum(fs afero.Fs, filename string) (string, error) { | ||
f, err := fs.Open(filename) | ||
if err != nil { | ||
return "", err | ||
} | ||
defer f.Close() | ||
h := sha256.New() | ||
if _, err := io.Copy(h, f); err != nil { | ||
return "", err | ||
} | ||
return hex.EncodeToString(h.Sum(nil)), nil | ||
} | ||
|
||
// CopyIfChanged copies source data to a destination filename if it has changed. | ||
func CopyIfChanged(destFilename string, source []byte, sourceHash string) error { | ||
sizeOK, err := CheckSize(destFilename, len(source)) | ||
if err != nil { | ||
return err | ||
} | ||
if sizeOK { | ||
hashOK, err := CheckHash(destFilename, sourceHash) | ||
if hashOK || err != nil { | ||
return err | ||
} | ||
} | ||
if err := Copy(destFilename, source); err != nil { | ||
return err | ||
} | ||
return SaveHash(destFilename, sourceHash) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package file | ||
|
||
import ( | ||
"os" | ||
"testing" | ||
|
||
"github.com/spf13/afero" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestCheckHash(t *testing.T) { | ||
// Temporarily swap to a memory filesystem. | ||
testableFS = afero.NewMemMapFs() | ||
defer func() { | ||
testableFS = afero.NewOsFs() | ||
}() | ||
fs := testableFS | ||
|
||
mockContent := "hello world\n" | ||
mockContentHash := "a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447" | ||
diffContent := "hello world?\n" | ||
diffContentHash := "d441ffff4b6663c3f150bda9c519a58c0685e34cf13d26e881d7e004f704eeba" | ||
|
||
cases := []struct { | ||
name string | ||
content string | ||
writeHash string | ||
|
||
checkHash string | ||
shouldFail bool | ||
}{ | ||
{ | ||
name: "static_hash_good", | ||
writeHash: mockContentHash, | ||
checkHash: mockContentHash, | ||
}, | ||
{ | ||
name: "static_hash_bad", | ||
writeHash: diffContentHash, | ||
checkHash: mockContentHash, | ||
shouldFail: true, | ||
}, | ||
{ | ||
name: "dynamic_hash_good", | ||
content: diffContent, | ||
checkHash: diffContentHash, | ||
}, | ||
{ | ||
name: "dynamic_hash_bad", | ||
content: mockContent, | ||
checkHash: diffContentHash, | ||
shouldFail: true, | ||
}, | ||
} | ||
|
||
filename := "test" | ||
for _, c := range cases { | ||
require.NoError(t, removeIfExists(fs, filename)) | ||
require.NoError(t, removeIfExists(fs, filename+hashExt)) | ||
t.Run(c.name, func(t *testing.T) { | ||
require.NoError(t, afero.WriteFile(fs, filename, []byte(c.content), 0o644)) | ||
if c.writeHash != "" { | ||
require.NoError(t, afero.WriteFile(fs, filename+hashExt, []byte(c.writeHash), 0o644)) | ||
} | ||
hashOK, err := CheckHash(filename, c.checkHash) | ||
assert.NoError(t, err) | ||
assert.Equal(t, !c.shouldFail, hashOK) | ||
}) | ||
} | ||
} | ||
|
||
func removeIfExists(fs afero.Fs, filename string) error { | ||
if err := fs.Remove(filename); err != nil && !os.IsNotExist(err) { | ||
return err | ||
} | ||
return nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should prefer MD5 here, the hash is used for integrity check and not for tamper protection and the faster MD5 algorithm should be enough.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was aiming for hashes not to need computing at runtime at all (#94 (comment)), the performance difference is negligible, and some people like to run automated scanners looking for usage of bad hash algorithms, but I'm fine with MD5 if you prefer.