Skip to content

Commit

Permalink
Speed up filter_bytes (#6699)
Browse files Browse the repository at this point in the history
* Use vec

* Use extend, fix capacity
  • Loading branch information
Dandandan authored Nov 7, 2024
1 parent e907bf8 commit e9bf8aa
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
2 changes: 1 addition & 1 deletion arrow-data/src/transform/variable_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fn extend_offset_values<T: ArrowNativeType + AsPrimitive<usize>>(
len: usize,
) {
let start_values = offsets[start].as_();
let end_values = offsets[start + len].as_();
let end_values: usize = offsets[start + len].as_();
let new_values = &values[start_values..end_values];
buffer.extend_from_slice(new_values);
}
Expand Down
21 changes: 11 additions & 10 deletions arrow-select/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,6 @@ fn filter_native<T: ArrowNativeType>(values: &[T], predicate: &FilterPredicate)
}
IterationStrategy::Indices(indices) => {
let iter = indices.iter().map(|x| values[*x]);

// SAFETY: `Vec::iter` is trusted length
unsafe { MutableBuffer::from_trusted_len_iter(iter) }
}
Expand Down Expand Up @@ -618,8 +617,8 @@ where
struct FilterBytes<'a, OffsetSize> {
src_offsets: &'a [OffsetSize],
src_values: &'a [u8],
dst_offsets: MutableBuffer,
dst_values: MutableBuffer,
dst_offsets: Vec<OffsetSize>,
dst_values: Vec<u8>,
cur_offset: OffsetSize,
}

Expand All @@ -631,10 +630,10 @@ where
where
T: ByteArrayType<Offset = OffsetSize>,
{
let num_offsets_bytes = (capacity + 1) * std::mem::size_of::<OffsetSize>();
let mut dst_offsets = MutableBuffer::new(num_offsets_bytes);
let dst_values = MutableBuffer::new(0);
let dst_values = Vec::new();
let mut dst_offsets: Vec<OffsetSize> = Vec::with_capacity(capacity + 1);
let cur_offset = OffsetSize::from_usize(0).unwrap();

dst_offsets.push(cur_offset);

Self {
Expand Down Expand Up @@ -664,13 +663,15 @@ where

/// Extends the in-progress array by the indexes in the provided iterator
fn extend_idx(&mut self, iter: impl Iterator<Item = usize>) {
for idx in iter {
let (start, end, len) = self.get_value_range(idx);
self.dst_offsets.extend(iter.map(|idx| {
let start = self.src_offsets[idx].as_usize();
let end = self.src_offsets[idx + 1].as_usize();
let len = OffsetSize::from_usize(end - start).expect("illegal offset range");
self.cur_offset += len;
self.dst_offsets.push(self.cur_offset);
self.dst_values
.extend_from_slice(&self.src_values[start..end]);
}
self.cur_offset
}));
}

/// Extends the in-progress array by the ranges in the provided iterator
Expand Down

0 comments on commit e9bf8aa

Please sign in to comment.