Skip to content

Commit 1875735

Browse files
committed
refactor: move compress_fastest to a new file
1 parent 6ee3957 commit 1875735

File tree

4 files changed

+82
-47
lines changed

4 files changed

+82
-47
lines changed

src/encoding/frame_compressor.rs

Lines changed: 10 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,11 @@ use twox_hash::XxHash64;
99
use core::hash::Hasher;
1010

1111
use super::{
12-
block_header::BlockHeader, blocks::compress_block, frame_header::FrameHeader,
12+
block_header::BlockHeader, frame_header::FrameHeader, levels::*,
1313
match_generator::MatchGeneratorDriver, CompressionLevel, Matcher,
1414
};
1515

16-
use crate::{
17-
common::MAX_BLOCK_SIZE,
18-
io::{Read, Write},
19-
};
16+
use crate::io::{Read, Write};
2017

2118
/// An interface for compressing arbitrary data with the ZStandard compression algorithm.
2219
///
@@ -106,24 +103,25 @@ impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
106103
/// To avoid endlessly encoding from a potentially endless source (like a network socket) you can use the
107104
/// [Read::take] function
108105
pub fn compress(&mut self) {
106+
// Clearing buffers to allow re-using of the compressor
109107
self.state.matcher.reset(self.compression_level);
110108
self.state.last_huff_table = None;
111109
let source = self.uncompressed_data.as_mut().unwrap();
112110
let drain = self.compressed_data.as_mut().unwrap();
113-
114-
let mut output = Vec::with_capacity(1024 * 130);
115-
let output = &mut output;
111+
// As the frame is compressed, it's stored here
112+
let output: &mut Vec<u8> = &mut Vec::with_capacity(1024 * 130);
113+
// First write the frame header
116114
let header = FrameHeader {
117115
frame_content_size: None,
118116
single_segment: false,
119117
content_checksum: cfg!(feature = "hash"),
120118
dictionary_id: None,
121119
window_size: Some(self.state.matcher.window_size()),
122120
};
123-
124121
header.serialize(output);
125-
122+
// Now compress block by block
126123
loop {
124+
// Read a single block's worth of uncompressed data from the input
127125
let mut uncompressed_data = self.state.matcher.get_next_space();
128126
let mut read_bytes = 0;
129127
let last_block;
@@ -140,6 +138,7 @@ impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
140138
}
141139
}
142140
uncompressed_data.resize(read_bytes, 0);
141+
// As we read, hash that data too
143142
#[cfg(feature = "hash")]
144143
self.hasher.write(&uncompressed_data);
145144
// Special handling is needed for compression of a totally empty file (why you'd want to do that, I don't know)
@@ -168,42 +167,7 @@ impl<R: Read, W: Write, M: Matcher> FrameCompressor<R, W, M> {
168167
output.extend_from_slice(&uncompressed_data);
169168
}
170169
CompressionLevel::Fastest => {
171-
if uncompressed_data.iter().all(|x| uncompressed_data[0].eq(x)) {
172-
let rle_byte = uncompressed_data[0];
173-
self.state.matcher.commit_space(uncompressed_data);
174-
self.state.matcher.skip_matching();
175-
let header = BlockHeader {
176-
last_block,
177-
block_type: crate::blocks::block::BlockType::RLE,
178-
block_size: read_bytes.try_into().unwrap(),
179-
};
180-
// Write the header, then the block
181-
header.serialize(output);
182-
output.push(rle_byte);
183-
} else {
184-
let mut compressed = Vec::new();
185-
self.state.matcher.commit_space(uncompressed_data);
186-
compress_block(&mut self.state, &mut compressed);
187-
if compressed.len() >= MAX_BLOCK_SIZE as usize {
188-
let header = BlockHeader {
189-
last_block,
190-
block_type: crate::blocks::block::BlockType::Raw,
191-
block_size: read_bytes.try_into().unwrap(),
192-
};
193-
// Write the header, then the block
194-
header.serialize(output);
195-
output.extend_from_slice(self.state.matcher.get_last_space());
196-
} else {
197-
let header = BlockHeader {
198-
last_block,
199-
block_type: crate::blocks::block::BlockType::Compressed,
200-
block_size: (compressed.len()).try_into().unwrap(),
201-
};
202-
// Write the header, then the block
203-
header.serialize(output);
204-
output.extend(compressed);
205-
}
206-
}
170+
compress_fastest(&mut self.state, last_block, uncompressed_data, output)
207171
}
208172
_ => {
209173
unimplemented!();

src/encoding/levels/fastest.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
use crate::{
2+
common::MAX_BLOCK_SIZE,
3+
encoding::{
4+
block_header::BlockHeader, blocks::compress_block, frame_compressor::CompressState, Matcher,
5+
},
6+
};
7+
use alloc::vec::Vec;
8+
9+
/// Compresses a single block at [`crate::encoding::CompressionLevel::Fastest`].
10+
///
11+
/// # Parameters
12+
/// - `state`: [`CompressState`] so the compressor can refer to data before
13+
/// the start of this block
14+
/// - `last_block`: Whether or not this block is going to be the last block in the frame
15+
/// (needed because this info is written into the block header)
16+
/// - `uncompressed_data`: A block's worth of uncompressed data, taken from the
17+
/// larger input
18+
/// - `output`: As `uncompressed_data` is compressed, it's appended to `output`.
19+
#[inline]
20+
pub fn compress_fastest<M: Matcher>(
21+
state: &mut CompressState<M>,
22+
last_block: bool,
23+
uncompressed_data: Vec<u8>,
24+
output: &mut Vec<u8>,
25+
) {
26+
let block_size = uncompressed_data.len() as u32;
27+
// First check to see if run length encoding can be used for the entire block
28+
if uncompressed_data.iter().all(|x| uncompressed_data[0].eq(x)) {
29+
let rle_byte = uncompressed_data[0];
30+
state.matcher.commit_space(uncompressed_data);
31+
state.matcher.skip_matching();
32+
let header = BlockHeader {
33+
last_block,
34+
block_type: crate::blocks::block::BlockType::RLE,
35+
block_size,
36+
};
37+
// Write the header, then the block
38+
header.serialize(output);
39+
output.push(rle_byte);
40+
} else {
41+
// Compress as a standard compressed block
42+
let mut compressed = Vec::new();
43+
state.matcher.commit_space(uncompressed_data);
44+
compress_block(state, &mut compressed);
45+
// If the compressed data is larger than the maximum
46+
// allowable block size, instead store uncompressed
47+
if compressed.len() >= MAX_BLOCK_SIZE as usize {
48+
let header = BlockHeader {
49+
last_block,
50+
block_type: crate::blocks::block::BlockType::Raw,
51+
block_size,
52+
};
53+
// Write the header, then the block
54+
header.serialize(output);
55+
output.extend_from_slice(state.matcher.get_last_space());
56+
} else {
57+
let header = BlockHeader {
58+
last_block,
59+
block_type: crate::blocks::block::BlockType::Compressed,
60+
block_size: compressed.len() as u32,
61+
};
62+
// Write the header, then the block
63+
header.serialize(output);
64+
output.extend(compressed);
65+
}
66+
}
67+
}

src/encoding/levels/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mod fastest;
2+
pub use fastest::compress_fastest;

src/encoding/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ pub(crate) mod match_generator;
77
pub(crate) mod util;
88

99
mod frame_compressor;
10+
mod levels;
1011
pub use frame_compressor::FrameCompressor;
1112

1213
use crate::io::{Read, Write};
@@ -68,7 +69,8 @@ pub enum CompressionLevel {
6869
/// making their own tradeoffs between runtime, memory usage and compression ratio
6970
///
7071
/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on.
71-
/// One or more of these buffers represent the window the decoder will need to decode the data again.
72+
/// Each one of these buffers is referred to as a *space*. One or more of these buffers represent the window
73+
/// the decoder will need to decode the data again.
7274
///
7375
/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the
7476
/// window of data that is being used for matching.

0 commit comments

Comments
 (0)