Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup StringBuilder methods #211

Merged
merged 36 commits into from
Jan 2, 2025
Merged
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
4d555fb
implement `StringBuilder::reserve`, `StringBuilder::reserve_exact` `S…
airblast-dev Dec 30, 2024
747e377
cleanup `StringBuilder`
airblast-dev Dec 30, 2024
259a22a
use `StringBuilder::reserve_exact` in `String::from_bytes`
airblast-dev Dec 30, 2024
5aebd6d
reuse `StringBuilder::reserve_exact` in `StringBuilder::reserve`
airblast-dev Dec 30, 2024
62ea409
add comment for `StringBuilder::push_bytes`
airblast-dev Dec 30, 2024
33561c0
Merge branch 'main' into string_cleanup
airblast-dev Dec 30, 2024
beca9ba
avoid allocating for 0 capacity in `StringBuilder::reserve*`
airblast-dev Dec 30, 2024
23f858d
dont allocate for an extra byte in `StringBuilder::push_bytes`
airblast-dev Dec 30, 2024
9350887
fix allocation sizes for `StringBuilder`
airblast-dev Dec 31, 2024
8b6f8c8
add tests
airblast-dev Dec 31, 2024
90744eb
remove _ casts to avoid type changes with refactors
airblast-dev Dec 31, 2024
c3b7ebb
shrink allocation to decrease leak size
airblast-dev Dec 31, 2024
b8fac4c
cleanup tests
airblast-dev Dec 31, 2024
7488494
optimize `String::from_bytes`
airblast-dev Dec 31, 2024
38ad425
add extra checks for debug mode
airblast-dev Dec 31, 2024
3df32f5
add doc comment
airblast-dev Dec 31, 2024
2c507a6
remove redundant comment
airblast-dev Dec 31, 2024
ebc1b81
use malloc in `String::with_capacity`
airblast-dev Dec 31, 2024
7addd67
add test for `StringBuilder::with_capacity`
airblast-dev Dec 31, 2024
33a6b5a
drop empty strings with reserved capacity
airblast-dev Jan 1, 2025
5d61731
fix off by one in alloc check
airblast-dev Jan 1, 2025
47b19a2
use with_capacity instead of reserve_exact
airblast-dev Jan 1, 2025
291934a
remove finish_unchecked
airblast-dev Jan 1, 2025
4f84eac
remove duplicate code in reserve*
airblast-dev Jan 1, 2025
15a3b96
reword remaining capacity docs
airblast-dev Jan 1, 2025
6f54bc5
remove extra check
airblast-dev Jan 1, 2025
81d561c
fix typo
airblast-dev Jan 1, 2025
2972fa7
remove redundant comment
airblast-dev Jan 1, 2025
df371ab
remove capacity check
airblast-dev Jan 2, 2025
6f6a6cd
fmt
airblast-dev Jan 2, 2025
c816c7d
add null check in finish
airblast-dev Jan 2, 2025
2304160
make finish public
airblast-dev Jan 2, 2025
8d464dc
fmt
airblast-dev Jan 2, 2025
a101287
Format comments
noib3 Jan 2, 2025
f7b29d3
Re-add commented out capacity check
noib3 Jan 2, 2025
cb67a1c
Rename `reserve_raw` to `realloc`
noib3 Jan 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
277 changes: 225 additions & 52 deletions crates/types/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use alloc::borrow::Cow;
use alloc::string::String as StdString;
use core::str::{self, Utf8Error};
use core::{ffi, fmt, ptr, slice};
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};

use luajit as lua;
Expand Down Expand Up @@ -88,7 +89,7 @@ impl String {
/// by a null byte.
#[inline]
pub fn from_bytes(bytes: &[u8]) -> Self {
let mut s = StringBuilder::new();
let mut s = StringBuilder::with_capacity(bytes.len());
s.push_bytes(bytes);
s.finish()
}
Expand All @@ -99,7 +100,8 @@ impl String {
self.len() == 0
}

/// Returns the length of the `String`, *not* including the final null byte.
/// Returns the length of the `String`, *not* including the final null
/// byte.
#[inline]
pub fn len(&self) -> usize {
self.len
Expand Down Expand Up @@ -141,52 +143,25 @@ impl StringBuilder {
}

/// Push new bytes to the builder.
///
/// When only pushing bytes once, prefer [`String::from_bytes`] as this
/// method may allocate extra space in the buffer.
#[inline]
pub fn push_bytes(&mut self, bytes: &[u8]) {
if self.inner.data.is_null() {
let len = bytes.len();
let cap = len + 1;

let data = unsafe {
let data = libc::malloc(cap) as *mut ffi::c_char;

libc::memcpy(data as *mut _, bytes.as_ptr() as *const _, len);

*data.add(len) = 0;

data
};

self.inner.data = data;
self.inner.len = len;
self.cap = cap;

let slice_len = bytes.len();
if slice_len == 0 {
return;
}

let slice_len = bytes.len();
let required_cap = self.inner.len + slice_len + 1;

// Reallocate if pushing the bytes overflows the allocated memory.
if self.cap < required_cap {
// The smallest number `n`, such that `required_cap <= * 2^n`.
let n = (required_cap - 1).ilog2() + 1;
let new_cap = 2_usize.pow(n).max(4);

self.inner.data = unsafe {
libc::realloc(self.inner.data as *mut _, new_cap)
as *mut ffi::c_char
};

self.cap = new_cap;
debug_assert!(self.inner.len < self.cap)
}
self.reserve(bytes.len());
debug_assert!(self.inner.len < self.cap);

// Pushing the `bytes` is safe now.
let new_len = unsafe {
libc::memcpy(
self.inner.data.add(self.inner.len) as *mut _,
bytes.as_ptr() as *const _,
self.inner.data.add(self.inner.len) as *mut ffi::c_void,
bytes.as_ptr() as *const ffi::c_void,
slice_len,
);

Expand All @@ -201,16 +176,142 @@ impl StringBuilder {
debug_assert!(self.inner.len < self.cap);
}

/// Build the `String`.
/// Initialize [`StringBuilder`] with capacity.
pub fn with_capacity(cap: usize) -> Self {
// Neovim uses `xstrdup` to clone strings, which doesn't support null
// pointers.
//
// For more infos, see https://github.com/noib3/nvim-oxi/pull/211#issuecomment-2566960494
//
// if cap == 0 {
// return Self::new();
// }
let real_cap = cap + 1;
let ptr = unsafe { libc::malloc(real_cap) };
if ptr.is_null() {
unable_to_alloc_memory();
}
Self {
inner: String { len: 0, data: ptr as *mut ffi::c_char },
cap: real_cap,
}
}

/// Reserve space for `additional` more bytes.
///
/// Does not allocate if enough space is available.
pub fn reserve(&mut self, additional: usize) {
let Some(min_capacity) = self.min_capacity_for_additional(additional)
else {
return;
};
let new_capacity =
min_capacity.checked_next_power_of_two().unwrap_or(min_capacity);
self.realloc(new_capacity);
}

/// Reserve space for exactly `additional` more bytes.
///
/// Does not allocate if enough space is available.
pub fn reserve_exact(&mut self, additional: usize) {
if let Some(new_capacity) =
self.min_capacity_for_additional(additional)
{
self.realloc(new_capacity);
}
}

/// Reallocate the string with the given capacity.
fn realloc(&mut self, new_capacity: NonZeroUsize) {
let ptr = unsafe {
libc::realloc(
self.inner.data as *mut ffi::c_void,
new_capacity.get(),
)
};
// `realloc` may return null if it is unable to allocate the requested
// memory.
if ptr.is_null() {
unable_to_alloc_memory();
}
self.inner.data = ptr as *mut ffi::c_char;
noib3 marked this conversation as resolved.
Show resolved Hide resolved
self.cap = new_capacity.get();
}

/// Finish building the [`String`]
#[inline]
pub fn finish(self) -> String {
let s = String { data: self.inner.data, len: self.inner.len };
let mut s = String { data: self.inner.data, len: self.inner.len() };

if s.data.is_null() {
debug_assert!(s.is_empty());
debug_assert_eq!(self.cap, 0);

// The pointer of `String` should never be null, and it must be
// terminated by a null byte.
if s.data.is_null() {
unsafe {
let ptr = libc::malloc(1) as *mut i8;
if ptr.is_null() {
unable_to_alloc_memory();
}
ptr.write(0);

s.data = ptr;
}
}
} else {
debug_assert!(self.cap > self.inner.len());
}

// Prevent self's destructor from being called.
std::mem::forget(self);

s
}

/// Returns the remaining *usable* capacity, i.e. the remaining capacity
/// minus the space reserved for the null terminator.
#[inline(always)]
fn remaining_capacity(&self) -> usize {
if self.inner.data.is_null() {
debug_assert_eq!(self.inner.len(), 0);
return 0;
}
debug_assert!(
self.cap > 0,
"when data ptr is not null capacity must always be larger than 0"
);
debug_assert!(
self.cap > self.inner.len(),
"allocated capacity must always be bigger than length"
);
self.cap - self.inner.len() - 1
}

/// Returns the minimum capacity needed to allocate `additional` bytes, or
/// `None` if the current capacity is already large enough.
#[inline]
fn min_capacity_for_additional(
&self,
additional: usize,
) -> Option<NonZeroUsize> {
let remaining = self.remaining_capacity();
if remaining >= additional {
return None;
}
if self.inner.data.is_null() {
debug_assert_eq!(self.cap, 0);
NonZeroUsize::new(additional + 1)
} else {
NonZeroUsize::new(self.cap + additional - remaining)
}
}
}

#[cold]
#[inline(never)]
fn unable_to_alloc_memory() {
panic!("unable to alloc memory with libc::realloc")
}

impl Clone for String {
Expand All @@ -233,7 +334,7 @@ impl Drop for String {
impl Drop for StringBuilder {
fn drop(&mut self) {
if !self.inner.data.is_null() {
unsafe { libc::free(self.inner.data as *mut _) }
unsafe { libc::free(self.inner.data as *mut ffi::c_void) }
}
}
}
Expand Down Expand Up @@ -428,7 +529,79 @@ mod tests {
#[test]
fn as_bytes() {
let s = String::from("hello");
assert_eq!(s.as_bytes(), &[b'h', b'e', b'l', b'l', b'o'][..]);
assert_eq!(s.as_bytes(), b"hello");
}

#[test]
fn empty_from_bytes() {
let s = String::from_bytes(b"");
assert_eq!(s.len(), 0);
assert!(!s.data.is_null());
}

#[test]
fn from_bytes() {
let s = String::from_bytes(b"Hello World!");
assert_eq!(s.len(), 12);
}

#[test]
fn with_capacity() {
let s = StringBuilder::with_capacity(0);
assert!(!s.inner.data.is_null());
assert_eq!(s.cap, 1);
assert_eq!(s.inner.len(), 0);
s.finish();
let s = StringBuilder::with_capacity(1);
assert_eq!(s.cap, 2);
assert_eq!(s.inner.len(), 0);
s.finish();
let s = StringBuilder::with_capacity(5);
assert_eq!(s.cap, 6);
assert_eq!(s.inner.len(), 0);
s.finish();
}

#[test]
fn reserve() {
let mut sb = StringBuilder::new();
assert_eq!(sb.cap, 0);
sb.reserve(10);
assert_eq!(sb.cap, 16);

// Shouldn't change the pointer address as we have enough space.
sb.reserve(10);
assert_eq!(sb.cap, 16);
let ptr = sb.inner.data;
sb.push_bytes(b"Hello World!");
// We already allocated the space needed the push above shouldn't
// change the pointer.
assert_eq!(sb.inner.data, ptr);
sb.push_bytes(&[b'a'; 55]);
// We shouldn't check the pointer again as the block might be extended
// instead of being moved to a different address.
assert_eq!(unsafe { *sb.inner.data.add(sb.inner.len) }, 0);
assert_eq!(sb.cap, 128);
}

#[test]
fn reserve_exact() {
let mut sb = StringBuilder::new();
sb.reserve_exact(10);
assert_eq!(sb.cap, 11);
let ptr = sb.inner.data;
sb.push_bytes(b"hi");
assert_eq!(sb.inner.len(), 2);

// The space is already allocated, pushing bytes shouldn't change the
// ptr address.
assert_eq!(ptr, sb.inner.data);
sb.push_bytes(b"Hello World!");
assert_eq!(sb.cap, 16);
assert_eq!(sb.inner.len(), 14);
let ptr = sb.inner.data;
sb.push_bytes(b"c");
assert_eq!(sb.inner.data, ptr);
}

#[test]
Expand Down Expand Up @@ -466,18 +639,18 @@ mod tests {

#[test]
fn builder() {
let str = "foo bar";
let s = "foo bar";
let bytes = b"baz foo bar";

let mut s = StringBuilder::new();
s.push_bytes(str.as_bytes());
s.push_bytes(bytes);
let mut sb = StringBuilder::new();
sb.push_bytes(s.as_bytes());
sb.push_bytes(bytes);

assert_eq!(s.inner.len, str.len() + bytes.len());
assert_eq!(s.cap, 32); // Allocation size
assert_eq!(unsafe { *s.inner.data.add(s.inner.len) }, 0); // Null termination
assert_eq!(sb.inner.len, s.len() + bytes.len());
assert_eq!(sb.cap, 32); // Allocation size
assert_eq!(unsafe { *sb.inner.data.add(sb.inner.len) }, 0); // Null termination

let s = s.finish();
assert_eq!(s.len(), str.len() + bytes.len());
let nv_str = sb.finish();
assert_eq!(nv_str.len(), s.len() + bytes.len());
}
}
Loading