From 179af512eaad61c69d9150b052182620dea8f941 Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Sun, 5 Jan 2025 20:49:22 +0100 Subject: [PATCH 1/9] Add unsafe little endian loaders Benchmarks pending --- .github/workflows/go.yml | 48 ++++++++++++++----------- flate/fast_encoder.go | 7 ++-- flate/fuzz_test.go | 1 - internal/le/le.go | 5 +++ internal/le/unsafe_disabled.go | 27 ++++++++++++++ internal/le/unsafe_enabled.go | 37 ++++++++++++++++++++ s2/encode_all.go | 6 ++-- zstd/_generate/gen.go | 4 +-- zstd/bitreader.go | 37 ++++++++++---------- zstd/decoder.go | 1 + zstd/matchlen_generic.go | 5 +-- zstd/seqdec.go | 2 +- zstd/seqdec_amd64.s | 64 +++++++++++++++++----------------- zstd/seqdec_generic.go | 2 +- zstd/zstd.go | 7 ++-- 15 files changed, 166 insertions(+), 87 deletions(-) create mode 100644 internal/le/le.go create mode 100644 internal/le/unsafe_disabled.go create mode 100644 internal/le/unsafe_enabled.go diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index ea71ec2f28..3b04386f22 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -35,15 +35,21 @@ jobs: - name: Test Noasm run: go test -tags=noasm ./... + - name: Test Nounsafe + run: go test -tags=nounsafe ./... + + - name: Test Nounsafe, noasm + run: go test -tags=nounsafe,noasm ./... + - name: Test Race 1 CPU env: CGO_ENABLED: 1 - run: go test -cpu=1 -short -race -v ./... + run: go test -cpu=1 -short -race -tags=nounsafe -v ./... - name: Test Race 4 CPU env: CGO_ENABLED: 1 - run: go test -cpu=4 -short -race -v ./... + run: go test -cpu=4 -short -race -tags=nounsafe -v ./... generate: strategy: @@ -122,22 +128,22 @@ jobs: uses: actions/checkout@v4 - name: S2/FuzzDictBlocks - run: go test -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/. - name: S2/FuzzEncodingBlocks - run: go test -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. - name: S2/FuzzLZ4Block - run: go test -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. - name: S2/FuzzDictBlocks/noasm - run: go test -tags=noasm -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/. + run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/. - name: S2/FuzzEncodingBlocks/noasm - run: go test -tags=noasm -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. + run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. - name: S2/FuzzLZ4Block/noasm - run: go test -tags=noasm -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. + run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. fuzz-zstd: env: @@ -153,28 +159,28 @@ jobs: uses: actions/checkout@v4 - name: zstd/FuzzDecodeAll - run: go test -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - name: zstd/FuzzDecAllNoBMI2 - run: go test -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - name: zstd/FuzzDecoder - run: go test -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - name: zstd/FuzzNoBMI2Dec - run: go test -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - name: zstd/FuzzEncoding - run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd .. + run: cd zstd&&go test -tags=nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd .. - name: zstd/FuzzDecodeAll/noasm - run: go test -tags=noasm -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - name: zstd/FuzzDecoder/noasm - run: go test -tags=noasm -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - name: zstd/FuzzEncoding/noasm - run: cd zstd&&go test -tags=noasm -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd .. + run: cd zstd&&go test -tags=noasm,nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd .. - name: zstd/FuzzEncodingBest run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=25000x -test.fuzzminimizetime=10ms -fuzz-start=4&&cd .. @@ -193,16 +199,16 @@ jobs: uses: actions/checkout@v4 - name: flate/FuzzEncoding - run: go test -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/. - name: flate/FuzzEncoding/noasm - run: go test -run=none -tags=noasm -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/. + run: go test -run=none -tags=noasm,nounsafe -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/. - name: zip/FuzzReader - run: go test -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/. - name: fse/FuzzCompress - run: go test -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. + run: go test -tags=nounsafe -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. - name: fse/FuzzDecompress - run: go test -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. \ No newline at end of file + run: go test -tags=nounsafe -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. diff --git a/flate/fast_encoder.go b/flate/fast_encoder.go index c8124b5c49..433977767b 100644 --- a/flate/fast_encoder.go +++ b/flate/fast_encoder.go @@ -6,8 +6,9 @@ package flate import ( - "encoding/binary" "fmt" + + "github.com/klauspost/compress/internal/le" ) type fastEnc interface { @@ -58,11 +59,11 @@ const ( ) func load3232(b []byte, i int32) uint32 { - return binary.LittleEndian.Uint32(b[i:]) + return le.Load32(b, i) } func load6432(b []byte, i int32) uint64 { - return binary.LittleEndian.Uint64(b[i:]) + return le.Load64(b, i) } type tableEntry struct { diff --git a/flate/fuzz_test.go b/flate/fuzz_test.go index 5529a78be0..8b02f460cf 100644 --- a/flate/fuzz_test.go +++ b/flate/fuzz_test.go @@ -1,5 +1,4 @@ //go:build go1.18 -// +build go1.18 package flate diff --git a/internal/le/le.go b/internal/le/le.go new file mode 100644 index 0000000000..e54909e16f --- /dev/null +++ b/internal/le/le.go @@ -0,0 +1,5 @@ +package le + +type Indexer interface { + int | int8 | int16 | int32 | int64 | uint | uint8 | uint16 | uint32 | uint64 +} diff --git a/internal/le/unsafe_disabled.go b/internal/le/unsafe_disabled.go new file mode 100644 index 0000000000..f9d81b17c1 --- /dev/null +++ b/internal/le/unsafe_disabled.go @@ -0,0 +1,27 @@ +//go:build !(amd64 || arm64 || ppc64le || riscv64) || nounsafe || purego || appengine + +package le + +import ( + "encoding/binary" +) + +func Load16[I Indexer](b []byte, i I) uint16 { + return binary.LittleEndian.Uint16(b[i:]) +} + +func Load32[I Indexer](b []byte, i I) uint32 { + return binary.LittleEndian.Uint32(b[i:]) +} + +func Load64[I Indexer](b []byte, i I) uint64 { + return binary.LittleEndian.Uint64(b[i:]) +} + +func Store16(b []byte, v uint16) { + binary.LittleEndian.PutUint16(b, v) +} + +func Store32(b []byte, v uint32) { + binary.LittleEndian.PutUint32(b, v) +} diff --git a/internal/le/unsafe_enabled.go b/internal/le/unsafe_enabled.go new file mode 100644 index 0000000000..18f3c4d102 --- /dev/null +++ b/internal/le/unsafe_enabled.go @@ -0,0 +1,37 @@ +// We enable 64 bit LE platforms: + +//go:build (amd64 || arm64 || ppc64le || riscv64) && !nounsafe && !purego && !appengine + +package le + +import ( + "unsafe" +) + +func Load16[I Indexer](b []byte, i I) uint16 { + //return binary.LittleEndian.Uint16(b[i:]) + //return *(*uint16)(unsafe.Pointer(&b[i])) + return *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) +} + +func Load32[I Indexer](b []byte, i I) uint32 { + //return binary.LittleEndian.Uint32(b[i:]) + //return *(*uint32)(unsafe.Pointer(&b[i])) + return *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) +} + +func Load64[I Indexer](b []byte, i I) uint64 { + //return binary.LittleEndian.Uint64(b[i:]) + //return *(*uint64)(unsafe.Pointer(&b[i])) + return *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) +} + +func Store16(b []byte, v uint16) { + //binary.LittleEndian.PutUint16(b, v) + *(*uint16)(unsafe.Pointer(&b[0])) = v +} + +func Store32(b []byte, v uint32) { + //binary.LittleEndian.PutUint32(b, v) + *(*uint32)(unsafe.Pointer(&b[0])) = v +} diff --git a/s2/encode_all.go b/s2/encode_all.go index 9977045696..c56ce52e7d 100644 --- a/s2/encode_all.go +++ b/s2/encode_all.go @@ -10,14 +10,16 @@ import ( "encoding/binary" "fmt" "math/bits" + + "github.com/klauspost/compress/internal/le" ) func load32(b []byte, i int) uint32 { - return binary.LittleEndian.Uint32(b[i:]) + return le.Load32(b, i) } func load64(b []byte, i int) uint64 { - return binary.LittleEndian.Uint64(b[i:]) + return le.Load64(b, i) } // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. diff --git a/zstd/_generate/gen.go b/zstd/_generate/gen.go index 4aa9ffdde7..c872cb1053 100644 --- a/zstd/_generate/gen.go +++ b/zstd/_generate/gen.go @@ -157,7 +157,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute Load(br.Field("value"), brValue) Load(br.Field("bitsRead"), brBitsRead) Load(br.Field("in").Base(), brPointer) - Load(br.Field("in").Len(), brOffset) + Load(br.Field("off"), brOffset) ADDQ(brOffset, brPointer) // Add current offset to read pointer. MOVQ(brPointer, brPointerStash) } @@ -438,7 +438,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute br := Dereference(Param("br")) Store(brValue, br.Field("value")) Store(brBitsRead.As8(), br.Field("bitsRead")) - Store(brOffset, br.Field("in").Len()) + Store(brOffset, br.Field("off")) if !o.useSeqs { Comment("Update the context") diff --git a/zstd/bitreader.go b/zstd/bitreader.go index 25ca983941..34e285fb73 100644 --- a/zstd/bitreader.go +++ b/zstd/bitreader.go @@ -5,11 +5,12 @@ package zstd import ( - "encoding/binary" "errors" "fmt" "io" "math/bits" + + "github.com/klauspost/compress/internal/le" ) // bitReader reads a bitstream in reverse. @@ -18,6 +19,7 @@ import ( type bitReader struct { in []byte value uint64 // Maybe use [16]byte, but shifting is awkward. + off int // offset where next read should end bitsRead uint8 } @@ -32,6 +34,7 @@ func (b *bitReader) init(in []byte) error { if v == 0 { return errors.New("corrupt stream, did not find end of stream") } + b.off = len(in) b.bitsRead = 64 b.value = 0 if len(in) >= 8 { @@ -67,18 +70,15 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - v := b.in[len(b.in)-4:] - b.in = b.in[:len(b.in)-4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) + b.off -= 4 + b.value = (b.value << 32) | uint64(le.Load32(b.in, b.off)) b.bitsRead -= 32 } // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. func (b *bitReader) fillFastStart() { - v := b.in[len(b.in)-8:] - b.in = b.in[:len(b.in)-8] - b.value = binary.LittleEndian.Uint64(v) + b.off -= 8 + b.value = le.Load64(b.in, b.off) b.bitsRead = 0 } @@ -87,25 +87,23 @@ func (b *bitReader) fill() { if b.bitsRead < 32 { return } - if len(b.in) >= 4 { - v := b.in[len(b.in)-4:] - b.in = b.in[:len(b.in)-4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) + if b.off >= 4 { + b.off -= 4 + b.value = (b.value << 32) | uint64(le.Load32(b.in, b.off)) b.bitsRead -= 32 return } - b.bitsRead -= uint8(8 * len(b.in)) - for len(b.in) > 0 { - b.value = (b.value << 8) | uint64(b.in[len(b.in)-1]) - b.in = b.in[:len(b.in)-1] + b.bitsRead -= uint8(8 * b.off) + for b.off > 0 { + b.off -= 1 + b.value = (b.value << 8) | uint64(b.in[b.off]) } } // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return len(b.in) == 0 && b.bitsRead >= 64 + return b.off == 0 && b.bitsRead >= 64 } // overread returns true if more bits have been requested than is on the stream. @@ -115,13 +113,14 @@ func (b *bitReader) overread() bool { // remain returns the number of bits remaining. func (b *bitReader) remain() uint { - return 8*uint(len(b.in)) + 64 - uint(b.bitsRead) + return 8*uint(b.off) + 64 - uint(b.bitsRead) } // close the bitstream and returns an error if out-of-buffer reads occurred. func (b *bitReader) close() error { // Release reference. b.in = nil + b.off = 0 if !b.finished() { return fmt.Errorf("%d extra bits on block, should be 0", b.remain()) } diff --git a/zstd/decoder.go b/zstd/decoder.go index 0170da828c..ffdb889f7f 100644 --- a/zstd/decoder.go +++ b/zstd/decoder.go @@ -323,6 +323,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { frame.bBuf = nil if frame.history.decoders.br != nil { frame.history.decoders.br.in = nil + frame.history.decoders.br.off = 0 } d.decoders <- block }() diff --git a/zstd/matchlen_generic.go b/zstd/matchlen_generic.go index 57b9c31c02..741e784491 100644 --- a/zstd/matchlen_generic.go +++ b/zstd/matchlen_generic.go @@ -7,15 +7,16 @@ package zstd import ( - "encoding/binary" "math/bits" + + "github.com/klauspost/compress/internal/le" ) // matchLen returns the maximum common prefix length of a and b. // a must be the shortest of the two. func matchLen(a, b []byte) (n int) { for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { - diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + diff := le.Load64(a, 0) ^ le.Load64(b, 0) if diff != 0 { return n + bits.TrailingZeros64(diff)>>3 } diff --git a/zstd/seqdec.go b/zstd/seqdec.go index d7fe6d82d9..1d69e6d72f 100644 --- a/zstd/seqdec.go +++ b/zstd/seqdec.go @@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { return io.ErrUnexpectedEOF } var ll, mo, ml int - if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { + if br.off > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) diff --git a/zstd/seqdec_amd64.s b/zstd/seqdec_amd64.s index f5591fa1e8..a708ca6d3d 100644 --- a/zstd/seqdec_amd64.s +++ b/zstd/seqdec_amd64.s @@ -7,9 +7,9 @@ TEXT ·sequenceDecs_decode_amd64(SB), $8-32 MOVQ br+8(FP), CX MOVQ 24(CX), DX - MOVBQZX 32(CX), BX + MOVBQZX 40(CX), BX MOVQ (CX), AX - MOVQ 8(CX), SI + MOVQ 32(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -299,8 +299,8 @@ sequenceDecs_decode_amd64_match_len_ofs_ok: MOVQ R13, 160(AX) MOVQ br+8(FP), AX MOVQ DX, 24(AX) - MOVB BL, 32(AX) - MOVQ SI, 8(AX) + MOVB BL, 40(AX) + MOVQ SI, 32(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -335,9 +335,9 @@ error_overread: TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 MOVQ br+8(FP), CX MOVQ 24(CX), DX - MOVBQZX 32(CX), BX + MOVBQZX 40(CX), BX MOVQ (CX), AX - MOVQ 8(CX), SI + MOVQ 32(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -598,8 +598,8 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok: MOVQ R13, 160(AX) MOVQ br+8(FP), AX MOVQ DX, 24(AX) - MOVB BL, 32(AX) - MOVQ SI, 8(AX) + MOVB BL, 40(AX) + MOVQ SI, 32(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -634,9 +634,9 @@ error_overread: TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 MOVQ br+8(FP), BX MOVQ 24(BX), AX - MOVBQZX 32(BX), DX + MOVBQZX 40(BX), DX MOVQ (BX), CX - MOVQ 8(BX), BX + MOVQ 32(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -884,8 +884,8 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok: MOVQ R12, 160(CX) MOVQ br+8(FP), CX MOVQ AX, 24(CX) - MOVB DL, 32(CX) - MOVQ BX, 8(CX) + MOVB DL, 40(CX) + MOVQ BX, 32(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -920,9 +920,9 @@ error_overread: TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 MOVQ br+8(FP), BX MOVQ 24(BX), AX - MOVBQZX 32(BX), DX + MOVBQZX 40(BX), DX MOVQ (BX), CX - MOVQ 8(BX), BX + MOVQ 32(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -1141,8 +1141,8 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok: MOVQ R12, 160(CX) MOVQ br+8(FP), CX MOVQ AX, 24(CX) - MOVB DL, 32(CX) - MOVQ BX, 8(CX) + MOVB DL, 40(CX) + MOVQ BX, 32(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -1787,9 +1787,9 @@ empty_seqs: TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ br+8(FP), CX MOVQ 24(CX), DX - MOVBQZX 32(CX), BX + MOVBQZX 40(CX), BX MOVQ (CX), AX - MOVQ 8(CX), SI + MOVQ 32(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -2281,8 +2281,8 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX MOVQ DX, 24(AX) - MOVB BL, 32(AX) - MOVQ SI, 8(AX) + MOVB BL, 40(AX) + MOVQ SI, 32(AX) // Update the context MOVQ ctx+16(FP), AX @@ -2349,9 +2349,9 @@ error_not_enough_space: TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ br+8(FP), BX MOVQ 24(BX), AX - MOVBQZX 32(BX), DX + MOVBQZX 40(BX), DX MOVQ (BX), CX - MOVQ 8(BX), BX + MOVQ 32(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -2801,8 +2801,8 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX MOVQ AX, 24(CX) - MOVB DL, 32(CX) - MOVQ BX, 8(CX) + MOVB DL, 40(CX) + MOVQ BX, 32(CX) // Update the context MOVQ ctx+16(FP), AX @@ -2869,9 +2869,9 @@ error_not_enough_space: TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ br+8(FP), CX MOVQ 24(CX), DX - MOVBQZX 32(CX), BX + MOVBQZX 40(CX), BX MOVQ (CX), AX - MOVQ 8(CX), SI + MOVQ 32(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -3465,8 +3465,8 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX MOVQ DX, 24(AX) - MOVB BL, 32(AX) - MOVQ SI, 8(AX) + MOVB BL, 40(AX) + MOVQ SI, 32(AX) // Update the context MOVQ ctx+16(FP), AX @@ -3533,9 +3533,9 @@ error_not_enough_space: TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ br+8(FP), BX MOVQ 24(BX), AX - MOVBQZX 32(BX), DX + MOVBQZX 40(BX), DX MOVQ (BX), CX - MOVQ 8(BX), BX + MOVQ 32(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -4087,8 +4087,8 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX MOVQ AX, 24(CX) - MOVB DL, 32(CX) - MOVQ BX, 8(CX) + MOVB DL, 40(CX) + MOVQ BX, 32(CX) // Update the context MOVQ ctx+16(FP), AX diff --git a/zstd/seqdec_generic.go b/zstd/seqdec_generic.go index 2fb35b788c..ac2a80d291 100644 --- a/zstd/seqdec_generic.go +++ b/zstd/seqdec_generic.go @@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { } for i := range seqs { var ll, mo, ml int - if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { + if br.off > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) diff --git a/zstd/zstd.go b/zstd/zstd.go index 066bef2a4f..6252b46ae6 100644 --- a/zstd/zstd.go +++ b/zstd/zstd.go @@ -5,10 +5,11 @@ package zstd import ( "bytes" - "encoding/binary" "errors" "log" "math" + + "github.com/klauspost/compress/internal/le" ) // enable debug printing @@ -110,11 +111,11 @@ func printf(format string, a ...interface{}) { } func load3232(b []byte, i int32) uint32 { - return binary.LittleEndian.Uint32(b[:len(b):len(b)][i:]) + return le.Load32(b, i) } func load6432(b []byte, i int32) uint64 { - return binary.LittleEndian.Uint64(b[:len(b):len(b)][i:]) + return le.Load64(b, i) } type byter interface { From fef4be656d453b33f0e3e401d4e677ab2d95c56a Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Sun, 5 Jan 2025 21:08:12 +0100 Subject: [PATCH 2/9] Try new test matrix --- .github/workflows/go.yml | 89 +++++++++++++++++----------------------- zstd/bitreader.go | 32 +++++++-------- zstd/decoder.go | 2 +- zstd/seqdec.go | 2 +- zstd/seqdec_generic.go | 2 +- 5 files changed, 56 insertions(+), 71 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 3b04386f22..02b1285b43 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -32,14 +32,14 @@ jobs: - name: Test run: go test ./... - - name: Test Noasm + - name: Test No-asm run: go test -tags=noasm ./... - - name: Test Nounsafe + - name: Test No-unsafe run: go test -tags=nounsafe ./... - - name: Test Nounsafe, noasm - run: go test -tags=nounsafe,noasm ./... + - name: Test No-unsafe, noasm + run: go test -tags="nounsafe,noasm" ./... - name: Test Race 1 CPU env: @@ -118,6 +118,9 @@ jobs: env: CGO_ENABLED: 0 runs-on: ubuntu-latest + strategy: + matrix: + tags: [ 'nounsafe', '"noasm,nounsafe"' ] steps: - name: Set up Go uses: actions/setup-go@v5.2.0 @@ -127,28 +130,23 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: S2/FuzzDictBlocks - run: go test -tags=nounsafe -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/. - - - name: S2/FuzzEncodingBlocks - run: go test -tags=nounsafe -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. - - - name: S2/FuzzLZ4Block - run: go test -tags=nounsafe -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. + - name: S2/FuzzDictBlocks/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/. - - name: S2/FuzzDictBlocks/noasm - run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDictBlocks -fuzztime=100000x -test.fuzzminimizetime=10ms ./s2/. + - name: S2/FuzzEncodingBlocks/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. - - name: S2/FuzzEncodingBlocks/noasm - run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzEncodingBlocks -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. + - name: S2/FuzzLZ4Block/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. - - name: S2/FuzzLZ4Block/noasm - run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzLZ4Block -fuzztime=500000x -test.fuzzminimizetime=10ms ./s2/. fuzz-zstd: env: CGO_ENABLED: 0 runs-on: ubuntu-latest + strategy: + matrix: + tags: [ 'nounsafe', '"noasm,nounsafe"' ] steps: - name: Set up Go uses: actions/setup-go@v5.2.0 @@ -158,57 +156,44 @@ jobs: - name: Checkout code uses: actions/checkout@v4 - - name: zstd/FuzzDecodeAll - run: go test -tags=nounsafe -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - - - name: zstd/FuzzDecAllNoBMI2 - run: go test -tags=nounsafe -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + - name: zstd/FuzzDecodeAll/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - - name: zstd/FuzzDecoder - run: go test -tags=nounsafe -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + - name: zstd/FuzzDecAllNoBMI2/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecAllNoBMI2 -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - - name: zstd/FuzzNoBMI2Dec - run: go test -tags=nounsafe -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. + - name: zstd/FuzzDecoder/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - - name: zstd/FuzzEncoding - run: cd zstd&&go test -tags=nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd .. + - name: zstd/FuzzNoBMI2Dec/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzNoBMI2Dec -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - - name: zstd/FuzzDecodeAll/noasm - run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDecodeAll -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - - - name: zstd/FuzzDecoder/noasm - run: go test -tags=noasm,nounsafe -run=none -fuzz=FuzzDecoder -fuzztime=500000x -test.fuzzminimizetime=10ms ./zstd/. - - - name: zstd/FuzzEncoding/noasm - run: cd zstd&&go test -tags=noasm,nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd .. - - - name: zstd/FuzzEncodingBest - run: cd zstd&&go test -run=none -fuzz=FuzzEncoding -fuzztime=25000x -test.fuzzminimizetime=10ms -fuzz-start=4&&cd .. + - name: zstd/FuzzEncoding/${{ matrix.tags }} + run: cd zstd&&go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncoding -fuzztime=250000x -test.fuzzminimizetime=10ms -fuzz-end=3&&cd .. fuzz-other: env: CGO_ENABLED: 0 runs-on: ubuntu-latest + strategy: + matrix: + tags: [ 'nounsafe', '"noasm,nounsafe"' ] steps: - name: Set up Go uses: actions/setup-go@v5.2.0 with: go-version: 1.23.x - - name: Checkout code uses: actions/checkout@v4 - - name: flate/FuzzEncoding - run: go test -tags=nounsafe -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/. - - - name: flate/FuzzEncoding/noasm - run: go test -run=none -tags=noasm,nounsafe -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/. + - name: flate/FuzzEncoding/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/. - - name: zip/FuzzReader - run: go test -tags=nounsafe -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/. + - name: zip/FuzzReader/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/. - - name: fse/FuzzCompress - run: go test -tags=nounsafe -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. + - name: fse/FuzzCompress/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzCompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. - - name: fse/FuzzDecompress - run: go test -tags=nounsafe -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. + - name: fse/FuzzDecompress/${{ matrix.tags }} + run: go test -tags=${{ matrix.tags }} -run=none -fuzz=FuzzDecompress -fuzztime=1000000x -test.fuzzminimizetime=10ms ./fse/. diff --git a/zstd/bitreader.go b/zstd/bitreader.go index 34e285fb73..d41e3e1709 100644 --- a/zstd/bitreader.go +++ b/zstd/bitreader.go @@ -19,7 +19,7 @@ import ( type bitReader struct { in []byte value uint64 // Maybe use [16]byte, but shifting is awkward. - off int // offset where next read should end + cursor int // offset where next read should end bitsRead uint8 } @@ -34,7 +34,7 @@ func (b *bitReader) init(in []byte) error { if v == 0 { return errors.New("corrupt stream, did not find end of stream") } - b.off = len(in) + b.cursor = len(in) b.bitsRead = 64 b.value = 0 if len(in) >= 8 { @@ -70,15 +70,15 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - b.off -= 4 - b.value = (b.value << 32) | uint64(le.Load32(b.in, b.off)) + b.cursor -= 4 + b.value = (b.value << 32) | uint64(le.Load32(b.in, b.cursor)) b.bitsRead -= 32 } // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. func (b *bitReader) fillFastStart() { - b.off -= 8 - b.value = le.Load64(b.in, b.off) + b.cursor -= 8 + b.value = le.Load64(b.in, b.cursor) b.bitsRead = 0 } @@ -87,23 +87,23 @@ func (b *bitReader) fill() { if b.bitsRead < 32 { return } - if b.off >= 4 { - b.off -= 4 - b.value = (b.value << 32) | uint64(le.Load32(b.in, b.off)) + if b.cursor >= 4 { + b.cursor -= 4 + b.value = (b.value << 32) | uint64(le.Load32(b.in, b.cursor)) b.bitsRead -= 32 return } - b.bitsRead -= uint8(8 * b.off) - for b.off > 0 { - b.off -= 1 - b.value = (b.value << 8) | uint64(b.in[b.off]) + b.bitsRead -= uint8(8 * b.cursor) + for b.cursor > 0 { + b.cursor -= 1 + b.value = (b.value << 8) | uint64(b.in[b.cursor]) } } // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 + return b.cursor == 0 && b.bitsRead >= 64 } // overread returns true if more bits have been requested than is on the stream. @@ -113,14 +113,14 @@ func (b *bitReader) overread() bool { // remain returns the number of bits remaining. func (b *bitReader) remain() uint { - return 8*uint(b.off) + 64 - uint(b.bitsRead) + return 8*uint(b.cursor) + 64 - uint(b.bitsRead) } // close the bitstream and returns an error if out-of-buffer reads occurred. func (b *bitReader) close() error { // Release reference. b.in = nil - b.off = 0 + b.cursor = 0 if !b.finished() { return fmt.Errorf("%d extra bits on block, should be 0", b.remain()) } diff --git a/zstd/decoder.go b/zstd/decoder.go index ffdb889f7f..ea2a19376c 100644 --- a/zstd/decoder.go +++ b/zstd/decoder.go @@ -323,7 +323,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { frame.bBuf = nil if frame.history.decoders.br != nil { frame.history.decoders.br.in = nil - frame.history.decoders.br.off = 0 + frame.history.decoders.br.cursor = 0 } d.decoders <- block }() diff --git a/zstd/seqdec.go b/zstd/seqdec.go index 1d69e6d72f..9a7de82f9e 100644 --- a/zstd/seqdec.go +++ b/zstd/seqdec.go @@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { return io.ErrUnexpectedEOF } var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if br.cursor > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) diff --git a/zstd/seqdec_generic.go b/zstd/seqdec_generic.go index ac2a80d291..7cec2197cd 100644 --- a/zstd/seqdec_generic.go +++ b/zstd/seqdec_generic.go @@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { } for i := range seqs { var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if br.cursor > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) From 1471edf4c6716f01721c1c501ad9ac1647e5a7a9 Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Mon, 6 Jan 2025 10:33:52 +0100 Subject: [PATCH 3/9] Fix field name. --- zstd/_generate/gen.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zstd/_generate/gen.go b/zstd/_generate/gen.go index c872cb1053..1554543b15 100644 --- a/zstd/_generate/gen.go +++ b/zstd/_generate/gen.go @@ -157,7 +157,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute Load(br.Field("value"), brValue) Load(br.Field("bitsRead"), brBitsRead) Load(br.Field("in").Base(), brPointer) - Load(br.Field("off"), brOffset) + Load(br.Field("cursor"), brOffset) ADDQ(brOffset, brPointer) // Add current offset to read pointer. MOVQ(brPointer, brPointerStash) } @@ -438,7 +438,7 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute br := Dereference(Param("br")) Store(brValue, br.Field("value")) Store(brBitsRead.As8(), br.Field("bitsRead")) - Store(brOffset, br.Field("off")) + Store(brOffset, br.Field("cursor")) if !o.useSeqs { Comment("Update the context") From bd6469850d5396bcaf00deb6f3350b6077a761cf Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Sun, 12 Jan 2025 11:47:20 +0100 Subject: [PATCH 4/9] Use in buff0, s2 go decoder. --- huff0/bitreader.go | 23 +++++++++-------------- huff0/decompress_test.go | 8 ++++---- s2/decode_other.go | 26 +++++++++++--------------- zstd/seqenc.go | 2 -- 4 files changed, 24 insertions(+), 35 deletions(-) diff --git a/huff0/bitreader.go b/huff0/bitreader.go index 6686d7371b..bfc7a523de 100644 --- a/huff0/bitreader.go +++ b/huff0/bitreader.go @@ -6,10 +6,11 @@ package huff0 import ( - "encoding/binary" "errors" "fmt" "io" + + "github.com/klauspost/compress/internal/le" ) // bitReader reads a bitstream in reverse. @@ -66,8 +67,7 @@ func (b *bitReaderBytes) fillFast() { } // 2 bounds checks. - v := b.in[b.off-4 : b.off] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + low := le.Load32(b.in, b.off-4) b.value |= uint64(low) << (b.bitsRead - 32) b.bitsRead -= 32 b.off -= 4 @@ -76,7 +76,7 @@ func (b *bitReaderBytes) fillFast() { // fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read. func (b *bitReaderBytes) fillFastStart() { // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.value = le.Load64(b.in, b.off-8) b.bitsRead = 0 b.off -= 8 } @@ -86,9 +86,8 @@ func (b *bitReaderBytes) fill() { if b.bitsRead < 32 { return } - if b.off > 4 { - v := b.in[b.off-4 : b.off] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + if b.off >= 4 { + low := le.Load32(b.in, b.off-4) b.value |= uint64(low) << (b.bitsRead - 32) b.bitsRead -= 32 b.off -= 4 @@ -175,9 +174,7 @@ func (b *bitReaderShifted) fillFast() { return } - // 2 bounds checks. - v := b.in[b.off-4 : b.off] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + low := le.Load32(b.in, b.off-4) b.value |= uint64(low) << ((b.bitsRead - 32) & 63) b.bitsRead -= 32 b.off -= 4 @@ -185,8 +182,7 @@ func (b *bitReaderShifted) fillFast() { // fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read. func (b *bitReaderShifted) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.value = le.Load64(b.in, b.off-8) b.bitsRead = 0 b.off -= 8 } @@ -197,8 +193,7 @@ func (b *bitReaderShifted) fill() { return } if b.off > 4 { - v := b.in[b.off-4 : b.off] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + low := le.Load32(b.in, b.off-4) b.value |= uint64(low) << ((b.bitsRead - 32) & 63) b.bitsRead -= 32 b.off -= 4 diff --git a/huff0/decompress_test.go b/huff0/decompress_test.go index fe23514fad..47123058ff 100644 --- a/huff0/decompress_test.go +++ b/huff0/decompress_test.go @@ -91,7 +91,7 @@ func TestDecompress1X(t *testing.T) { t.Log(string(dc)) } //t.Errorf(test.name+": decompressed, got delta: \n%s") - t.Errorf(test.name + ": decompressed, got delta") + t.Error(test.name + ": decompressed, got delta") } if !t.Failed() { t.Log("... roundtrip ok!") @@ -221,7 +221,7 @@ func TestDecompress4X(t *testing.T) { t.Log(string(dc)) } //t.Errorf(test.name+": decompressed, got delta: \n%s") - t.Errorf(test.name + ": decompressed, got delta") + t.Error(test.name + ": decompressed, got delta") } if !t.Failed() { t.Log("... roundtrip ok!") @@ -315,7 +315,7 @@ func TestRoundtrip1XFuzz(t *testing.T) { t.Log(string(dc)) } //t.Errorf(test.name+": decompressed, got delta: \n%s") - t.Errorf(test.name + ": decompressed, got delta") + t.Error(test.name + ": decompressed, got delta") } if !t.Failed() { t.Log("... roundtrip ok!") @@ -406,7 +406,7 @@ func TestRoundtrip4XFuzz(t *testing.T) { t.Log(string(dc)) } //t.Errorf(test.name+": decompressed, got delta: \n%s") - t.Errorf(test.name + ": decompressed, got delta") + t.Error(test.name + ": decompressed, got delta") } if !t.Failed() { t.Log("... roundtrip ok!") diff --git a/s2/decode_other.go b/s2/decode_other.go index 2cb55c2c77..c99d40b69d 100644 --- a/s2/decode_other.go +++ b/s2/decode_other.go @@ -11,6 +11,8 @@ package s2 import ( "fmt" "strconv" + + "github.com/klauspost/compress/internal/le" ) // decode writes the decoding of src to dst. It assumes that the varint-encoded @@ -38,21 +40,18 @@ func s2Decode(dst, src []byte) int { case x < 60: s++ case x == 60: + x = uint32(src[s+1]) s += 2 - x = uint32(src[s-1]) case x == 61: - in := src[s : s+3] - x = uint32(in[1]) | uint32(in[2])<<8 + x = uint32(le.Load16(src, s+1)) s += 3 case x == 62: - in := src[s : s+4] // Load as 32 bit and shift down. - x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + x = le.Load32(src, s) x >>= 8 s += 4 case x == 63: - in := src[s : s+5] - x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24 + x = le.Load32(src, s+1) s += 5 } length = int(x) + 1 @@ -85,8 +84,7 @@ func s2Decode(dst, src []byte) int { length = int(src[s]) + 4 s += 1 case 6: - in := src[s : s+2] - length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8) + length = int(le.Load16(src, s)) + 1<<8 s += 2 case 7: in := src[s : s+3] @@ -99,15 +97,13 @@ func s2Decode(dst, src []byte) int { } length += 4 case tagCopy2: - in := src[s : s+3] - offset = int(uint32(in[1]) | uint32(in[2])<<8) - length = 1 + int(in[0])>>2 + offset = int(le.Load16(src, s+1)) + length = 1 + int(src[s])>>2 s += 3 case tagCopy4: - in := src[s : s+5] - offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24) - length = 1 + int(in[0])>>2 + offset = int(le.Load32(src, s+1)) + length = 1 + int(src[s])>>2 s += 5 } diff --git a/zstd/seqenc.go b/zstd/seqenc.go index 8014174a77..65045eabdd 100644 --- a/zstd/seqenc.go +++ b/zstd/seqenc.go @@ -69,7 +69,6 @@ var llBitsTable = [maxLLCode + 1]byte{ func llCode(litLength uint32) uint8 { const llDeltaCode = 19 if litLength <= 63 { - // Compiler insists on bounds check (Go 1.12) return llCodeTable[litLength&63] } return uint8(highBit(litLength)) + llDeltaCode @@ -102,7 +101,6 @@ var mlBitsTable = [maxMLCode + 1]byte{ func mlCode(mlBase uint32) uint8 { const mlDeltaCode = 36 if mlBase <= 127 { - // Compiler insists on bounds check (Go 1.12) return mlCodeTable[mlBase&127] } return uint8(highBit(mlBase)) + mlDeltaCode From 2fcbfef02937d61375ab9fbf4ef914dc29f81b70 Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Tue, 14 Jan 2025 14:21:48 +0100 Subject: [PATCH 5/9] Avoid loop bounds check. --- flate/matchlen_generic.go | 21 +++++++++++++-------- internal/le/unsafe_enabled.go | 8 ++++++++ zstd/matchlen_generic.go | 16 +++++++++++----- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/flate/matchlen_generic.go b/flate/matchlen_generic.go index ad5cd814b9..a27a9756f3 100644 --- a/flate/matchlen_generic.go +++ b/flate/matchlen_generic.go @@ -7,21 +7,27 @@ package flate import ( - "encoding/binary" "math/bits" + + "github.com/klauspost/compress/internal/le" ) // matchLen returns the maximum common prefix length of a and b. // a must be the shortest of the two. func matchLen(a, b []byte) (n int) { - for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { - diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) - if diff != 0 { - return n + bits.TrailingZeros64(diff)>>3 + if len(a) >= 8 && len(b) >= 8 { + left := len(a) - 8 + for left >= 0 { + diff := le.Load64(a, n) ^ le.Load64(b, n) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + left -= 8 } - n += 8 } - + a = a[n:] + b = b[n:] for i := range a { if a[i] != b[i] { break @@ -29,5 +35,4 @@ func matchLen(a, b []byte) (n int) { n++ } return n - } diff --git a/internal/le/unsafe_enabled.go b/internal/le/unsafe_enabled.go index 18f3c4d102..b12e0316f4 100644 --- a/internal/le/unsafe_enabled.go +++ b/internal/le/unsafe_enabled.go @@ -8,24 +8,32 @@ import ( "unsafe" ) +// Load16 will load from b at index i. +// If the compiler can prove that b is at least 1 byte this will be without bounds check. func Load16[I Indexer](b []byte, i I) uint16 { //return binary.LittleEndian.Uint16(b[i:]) //return *(*uint16)(unsafe.Pointer(&b[i])) return *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) } +// Load32 will load from b at index i. +// If the compiler can prove that b is at least 1 byte this will be without bounds check. func Load32[I Indexer](b []byte, i I) uint32 { //return binary.LittleEndian.Uint32(b[i:]) //return *(*uint32)(unsafe.Pointer(&b[i])) return *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) } +// Load64 will load from b at index i. +// If the compiler can prove that b is at least 1 byte this will be without bounds check. func Load64[I Indexer](b []byte, i I) uint64 { //return binary.LittleEndian.Uint64(b[i:]) //return *(*uint64)(unsafe.Pointer(&b[i])) return *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) } +// Store16 will store v at b. +// If the compiler can prove func Store16(b []byte, v uint16) { //binary.LittleEndian.PutUint16(b, v) *(*uint16)(unsafe.Pointer(&b[0])) = v diff --git a/zstd/matchlen_generic.go b/zstd/matchlen_generic.go index 741e784491..d4627eb4ab 100644 --- a/zstd/matchlen_generic.go +++ b/zstd/matchlen_generic.go @@ -15,13 +15,19 @@ import ( // matchLen returns the maximum common prefix length of a and b. // a must be the shortest of the two. func matchLen(a, b []byte) (n int) { - for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { - diff := le.Load64(a, 0) ^ le.Load64(b, 0) - if diff != 0 { - return n + bits.TrailingZeros64(diff)>>3 + if len(a) >= 8 && len(b) >= 8 { + left := len(a) - 8 + for left >= 0 { + diff := le.Load64(a, n) ^ le.Load64(b, n) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + left -= 8 } - n += 8 } + a = a[n:] + b = b[n:] for i := range a { if a[i] != b[i] { From 75ce7ef4e98d16cf95af267d5e61edd850fc650c Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Tue, 14 Jan 2025 15:01:23 +0100 Subject: [PATCH 6/9] Avoid length check on callers. --- flate/matchlen_generic.go | 17 ++++++++--------- internal/le/unsafe_enabled.go | 10 +++++----- zstd/matchlen_generic.go | 16 +++++++--------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/flate/matchlen_generic.go b/flate/matchlen_generic.go index a27a9756f3..8c840f9b40 100644 --- a/flate/matchlen_generic.go +++ b/flate/matchlen_generic.go @@ -15,17 +15,16 @@ import ( // matchLen returns the maximum common prefix length of a and b. // a must be the shortest of the two. func matchLen(a, b []byte) (n int) { - if len(a) >= 8 && len(b) >= 8 { - left := len(a) - 8 - for left >= 0 { - diff := le.Load64(a, n) ^ le.Load64(b, n) - if diff != 0 { - return n + bits.TrailingZeros64(diff)>>3 - } - n += 8 - left -= 8 + left := len(a) + for left >= 8 { + diff := le.Load64(a, n) ^ le.Load64(b, n) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 } + n += 8 + left -= 8 } + a = a[n:] b = b[n:] for i := range a { diff --git a/internal/le/unsafe_enabled.go b/internal/le/unsafe_enabled.go index b12e0316f4..342d4b51c2 100644 --- a/internal/le/unsafe_enabled.go +++ b/internal/le/unsafe_enabled.go @@ -13,7 +13,7 @@ import ( func Load16[I Indexer](b []byte, i I) uint16 { //return binary.LittleEndian.Uint16(b[i:]) //return *(*uint16)(unsafe.Pointer(&b[i])) - return *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) + return *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(b))) + uintptr(i)*unsafe.Sizeof(b[0]))) } // Load32 will load from b at index i. @@ -21,7 +21,7 @@ func Load16[I Indexer](b []byte, i I) uint16 { func Load32[I Indexer](b []byte, i I) uint32 { //return binary.LittleEndian.Uint32(b[i:]) //return *(*uint32)(unsafe.Pointer(&b[i])) - return *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) + return *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(b))) + uintptr(i)*unsafe.Sizeof(b[0]))) } // Load64 will load from b at index i. @@ -29,17 +29,17 @@ func Load32[I Indexer](b []byte, i I) uint32 { func Load64[I Indexer](b []byte, i I) uint64 { //return binary.LittleEndian.Uint64(b[i:]) //return *(*uint64)(unsafe.Pointer(&b[i])) - return *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(&b[0])) + uintptr(i)*unsafe.Sizeof(b[0]))) + return *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(b))) + uintptr(i)*unsafe.Sizeof(b[0]))) } // Store16 will store v at b. // If the compiler can prove func Store16(b []byte, v uint16) { //binary.LittleEndian.PutUint16(b, v) - *(*uint16)(unsafe.Pointer(&b[0])) = v + *(*uint16)(unsafe.Pointer(unsafe.SliceData(b))) = v } func Store32(b []byte, v uint32) { //binary.LittleEndian.PutUint32(b, v) - *(*uint32)(unsafe.Pointer(&b[0])) = v + *(*uint32)(unsafe.Pointer(unsafe.SliceData(b))) = v } diff --git a/zstd/matchlen_generic.go b/zstd/matchlen_generic.go index d4627eb4ab..bea1779e97 100644 --- a/zstd/matchlen_generic.go +++ b/zstd/matchlen_generic.go @@ -15,16 +15,14 @@ import ( // matchLen returns the maximum common prefix length of a and b. // a must be the shortest of the two. func matchLen(a, b []byte) (n int) { - if len(a) >= 8 && len(b) >= 8 { - left := len(a) - 8 - for left >= 0 { - diff := le.Load64(a, n) ^ le.Load64(b, n) - if diff != 0 { - return n + bits.TrailingZeros64(diff)>>3 - } - n += 8 - left -= 8 + left := len(a) + for left >= 8 { + diff := le.Load64(a, n) ^ le.Load64(b, n) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 } + n += 8 + left -= 8 } a = a[n:] b = b[n:] From 4a120045da0c79656cb612efea4e9be4f3a98294 Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Tue, 14 Jan 2025 15:15:38 +0100 Subject: [PATCH 7/9] Use in more places. --- flate/huffman_bit_writer.go | 19 ++++++++++--------- flate/level1.go | 31 ++++++++++++++++--------------- internal/le/unsafe_disabled.go | 4 ++++ internal/le/unsafe_enabled.go | 17 ++++++++++------- 4 files changed, 40 insertions(+), 31 deletions(-) diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index f70594c34e..afdc8c053a 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -5,10 +5,11 @@ package flate import ( - "encoding/binary" "fmt" "io" "math" + + "github.com/klauspost/compress/internal/le" ) const ( @@ -438,7 +439,7 @@ func (w *huffmanBitWriter) writeOutBits() { n := w.nbytes // We over-write, but faster... - binary.LittleEndian.PutUint64(w.bytes[n:], bits) + le.Store64(w.bytes[n:], bits) n += 6 if n >= bufferFlushSize { @@ -854,7 +855,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) bits |= c.code64() << (nbits & 63) nbits += c.len() if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + le.Store64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits bits >>= 48 nbits -= 48 @@ -882,7 +883,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) bits |= c.code64() << (nbits & 63) nbits += c.len() if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + le.Store64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits bits >>= 48 nbits -= 48 @@ -905,7 +906,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) bits |= uint64(extraLength) << (nbits & 63) nbits += extraLengthBits if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + le.Store64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits bits >>= 48 nbits -= 48 @@ -931,7 +932,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) bits |= c.code64() << (nbits & 63) nbits += c.len() if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + le.Store64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits bits >>= 48 nbits -= 48 @@ -953,7 +954,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) nbits += uint8(offsetComb) if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + le.Store64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits bits >>= 48 nbits -= 48 @@ -1107,7 +1108,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { // We must have at least 48 bits free. if nbits >= 8 { n := nbits >> 3 - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + le.Store64(w.bytes[nbytes:], bits) bits >>= (n * 8) & 63 nbits -= n * 8 nbytes += n @@ -1136,7 +1137,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { // Remaining... for _, t := range input { if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + le.Store64(w.bytes[nbytes:], bits) //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits bits >>= 48 nbits -= 48 diff --git a/flate/level1.go b/flate/level1.go index 703b9a89aa..61854a3526 100644 --- a/flate/level1.go +++ b/flate/level1.go @@ -1,9 +1,10 @@ package flate import ( - "encoding/binary" "fmt" "math/bits" + + "github.com/klauspost/compress/internal/le" ) // fastGen maintains the table for matches, @@ -126,26 +127,26 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { l = e.matchlenLong(s+4, t+4, src) + 4 } else { // inlined: - a := src[s+4:] - b := src[t+4:] - for len(a) >= 8 { - if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + a := src[s:] + b := src[t:] + left := len(a) - 4 + for left >= 8 { + if diff := le.Load64(a, l) ^ le.Load64(b, l); diff != 0 { l += int32(bits.TrailingZeros64(diff) >> 3) - break + goto endMatch } l += 8 - a = a[8:] - b = b[8:] + left -= 8 } - if len(a) < 8 { - b = b[:len(a)] - for i := range a { - if a[i] != b[i] { - break - } - l++ + a = a[l:] + b = b[l:] + for i := range a { + if a[i] != b[i] { + break } + l++ } + endMatch: } // Extend backwards diff --git a/internal/le/unsafe_disabled.go b/internal/le/unsafe_disabled.go index f9d81b17c1..9643495f01 100644 --- a/internal/le/unsafe_disabled.go +++ b/internal/le/unsafe_disabled.go @@ -25,3 +25,7 @@ func Store16(b []byte, v uint16) { func Store32(b []byte, v uint32) { binary.LittleEndian.PutUint32(b, v) } + +func Store64(b []byte, v uint64) { + binary.LittleEndian.PutUint64(b, v) +} diff --git a/internal/le/unsafe_enabled.go b/internal/le/unsafe_enabled.go index 342d4b51c2..5a5d2dabf0 100644 --- a/internal/le/unsafe_enabled.go +++ b/internal/le/unsafe_enabled.go @@ -9,37 +9,40 @@ import ( ) // Load16 will load from b at index i. -// If the compiler can prove that b is at least 1 byte this will be without bounds check. func Load16[I Indexer](b []byte, i I) uint16 { //return binary.LittleEndian.Uint16(b[i:]) //return *(*uint16)(unsafe.Pointer(&b[i])) - return *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(b))) + uintptr(i)*unsafe.Sizeof(b[0]))) + return *(*uint16)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) } // Load32 will load from b at index i. -// If the compiler can prove that b is at least 1 byte this will be without bounds check. func Load32[I Indexer](b []byte, i I) uint32 { //return binary.LittleEndian.Uint32(b[i:]) //return *(*uint32)(unsafe.Pointer(&b[i])) - return *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(b))) + uintptr(i)*unsafe.Sizeof(b[0]))) + return *(*uint32)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) } // Load64 will load from b at index i. -// If the compiler can prove that b is at least 1 byte this will be without bounds check. func Load64[I Indexer](b []byte, i I) uint64 { //return binary.LittleEndian.Uint64(b[i:]) //return *(*uint64)(unsafe.Pointer(&b[i])) - return *(*uint64)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(b))) + uintptr(i)*unsafe.Sizeof(b[0]))) + return *(*uint64)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(b)), i)) } // Store16 will store v at b. -// If the compiler can prove func Store16(b []byte, v uint16) { //binary.LittleEndian.PutUint16(b, v) *(*uint16)(unsafe.Pointer(unsafe.SliceData(b))) = v } +// Store32 will store v at b. func Store32(b []byte, v uint32) { //binary.LittleEndian.PutUint32(b, v) *(*uint32)(unsafe.Pointer(unsafe.SliceData(b))) = v } + +// Store64 will store v at b. +func Store64(b []byte, v uint64) { + //binary.LittleEndian.PutUint64(b, v) + *(*uint64)(unsafe.Pointer(unsafe.SliceData(b))) = v +} From b959ae35c8d655e318db0c10d9ca040975c11dce Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Tue, 14 Jan 2025 15:57:08 +0100 Subject: [PATCH 8/9] Bump s2sx version --- s2sx.mod | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/s2sx.mod b/s2sx.mod index 5a4412f907..15b74a57c7 100644 --- a/s2sx.mod +++ b/s2sx.mod @@ -1,4 +1,3 @@ module github.com/klauspost/compress -go 1.19 - +go 1.21 From 209574e8fe048b3a5dcb3ac3af29c4ada3b92426 Mon Sep 17 00:00:00 2001 From: Klaus Post <klauspost@gmail.com> Date: Tue, 14 Jan 2025 16:40:16 +0100 Subject: [PATCH 9/9] Update docs --- README.md | 11 +++++++++++ flate/stateless.go | 13 ++++--------- zstd/README.md | 2 +- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index de264c85a5..80ede339f5 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,17 @@ This package provides various compression algorithms. [](https://github.com/klauspost/compress/actions/workflows/go.yml) [](https://sourcegraph.com/github.com/klauspost/compress?badge) +# package usage + +Use `go get github.com/klauspost/compress@latest` to add it to your project. + +This package will support the current Go version and 2 versions back. + +* Use the `nounsafe` tag to disable all use of the "unsafe" package. +* Use the `noasm` tag to disable all assembly across packages. + +Use the links above for more information on each. + # changelog * Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) diff --git a/flate/stateless.go b/flate/stateless.go index f3d4139ef3..13b9b100db 100644 --- a/flate/stateless.go +++ b/flate/stateless.go @@ -4,6 +4,8 @@ import ( "io" "math" "sync" + + "github.com/klauspost/compress/internal/le" ) const ( @@ -152,18 +154,11 @@ func hashSL(u uint32) uint32 { } func load3216(b []byte, i int16) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + return le.Load32(b, i) } func load6416(b []byte, i int16) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return le.Load64(b, i) } func statelessEnc(dst *tokens, src []byte, startAt int16) { diff --git a/zstd/README.md b/zstd/README.md index 92e2347bbc..c11d7fa28e 100644 --- a/zstd/README.md +++ b/zstd/README.md @@ -6,7 +6,7 @@ A high performance compression algorithm is implemented. For now focused on spee This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. -This package is pure Go and without use of "unsafe". +This package is pure Go. Use `noasm` and `nounsafe` to disable relevant features. The `zstd` package is provided as open source software using a Go standard license.