Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions Doc/library/stdtypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3158,6 +3158,97 @@ objects.

.. versionadded:: 3.14

.. method:: take_bytes(n=None, /)

Take the first *n* bytes as an immutable :class:`bytes`. Defaults to all
bytes.

If *n* is negative indexes from the end and takes the first :func:`len`
minus *n* bytes. If *n* is out of bounds raises :exc:`IndexError`.

Taking less than the full length will leave remaining bytes in the
:class:`bytearray` which requires a copy. If the remaining bytes should be
discarded use :func:`~bytearray.resize` or :keyword:`del` to truncate
then :func:`~bytearray.take_bytes` without a size.

.. impl-detail::

CPython implements this as a zero-copy operation making it a very
efficient way to make a :class:`bytes` from a :class:`bytearray`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only when n is None, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I've been struggling to find a concise wording "minimal copying" leaves a lot more wiggle room to me; can just add the caveat "when taking all bytes" (ba.take_bytes(len(ba)) also doesn't copy)


.. list-table:: Suggested Replacements
:header-rows: 1

* - Description
- Old
- New

* - Return :class:`bytes` after working with :class:`bytearray`
- .. code:: python


def read() -> bytes:
buffer = bytearray(1024)
...
return bytes(buffer)
- .. code:: python

def read() -> bytes:
buffer = bytearray(1024)
...
return buffer.take_bytes()

* - Empty a buffer getting the bytes
- .. code:: python

buffer = bytearray(1024)
...
data = bytes(buffer)
buffer.clear()
- .. code:: python

buffer = bytearray(1024)
...
data = buffer.take_bytes()
assert len(buffer) == 0

* - Split a buffer at a specific separator
- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = bytes(buffer[:n + 1])
del buffer[:n + 1]
assert buffer == bytearray(b'def')

- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = buffer.take_bytes(n + 1)
assert buffer == bytearray(b'def')

* - Split a buffer at a specific separator; discard after the separator
- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
data = bytes(buffer[:n])
buffer.clear()
assert data == b'abc'
assert len(buffer) == 0

- .. code:: python

buffer = bytearray(b'abc\ndef')
n = buffer.find(b'\n')
buffer.resize(n)
data = buffer.take_bytes()
assert data == b'abc'
assert len(buffer) == 0

.. versionadded:: next

Since bytearray objects are sequences of integers (akin to a list), for a
bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be
a bytearray object of length 1. (This contrasts with text strings, where
Expand Down
52 changes: 52 additions & 0 deletions Lib/test/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,58 @@ def test_resize(self):
self.assertRaises(MemoryError, bytearray().resize, sys.maxsize)
self.assertRaises(MemoryError, bytearray(1000).resize, sys.maxsize)

def test_take_bytes(self):
ba = bytearray(b'ab')
self.assertEqual(ba.take_bytes(), b'ab')
self.assertEqual(len(ba), 0)
self.assertEqual(ba, bytearray(b''))

# Positive and negative slicing.
ba = bytearray(b'abcdef')
self.assertEqual(ba.take_bytes(1), b'a')
self.assertEqual(ba, bytearray(b'bcdef'))
self.assertEqual(len(ba), 5)
self.assertEqual(ba.take_bytes(-5), b'')
self.assertEqual(ba, bytearray(b'bcdef'))
self.assertEqual(len(ba), 5)
self.assertEqual(ba.take_bytes(-3), b'bc')
self.assertEqual(ba, bytearray(b'def'))
self.assertEqual(len(ba), 3)
self.assertEqual(ba.take_bytes(3), b'def')
self.assertEqual(ba, bytearray(b''))
self.assertEqual(len(ba), 0)

# Take nothing from emptiness.
self.assertEqual(ba.take_bytes(0), b'')
self.assertEqual(ba.take_bytes(), b'')
self.assertEqual(ba.take_bytes(None), b'')

# Out of bounds, bad take value.
self.assertRaises(IndexError, ba.take_bytes, -1)
self.assertRaises(TypeError, ba.take_bytes, 3.14)
ba = bytearray(b'abcdef')
self.assertRaises(IndexError, ba.take_bytes, 7)

# Offset between physical and logical start (ob_bytes != ob_start).
ba = bytearray(b'abcde')
del ba[:2]
self.assertEqual(ba, bytearray(b'cde'))
self.assertEqual(ba.take_bytes(), b'cde')

# Overallocation at end.
ba = bytearray(b'abcde')
del ba[-2:]
self.assertEqual(ba, bytearray(b'abc'))
self.assertEqual(ba.take_bytes(), b'abc')
ba = bytearray(b'abcde')
ba.resize(4)
self.assertEqual(ba.take_bytes(), b'abcd')

# Take of a bytearray with references should fail.
ba = bytearray(b'abc')
with memoryview(ba) as mv:
self.assertRaises(BufferError, ba.take_bytes)
self.assertEqual(ba.take_bytes(), b'abc')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to update test_free_threading_bytearray to include resize (probably separate PR) and take_bytes


def test_setitem(self):
def setitem_as_mapping(b, i, val):
Expand Down
90 changes: 90 additions & 0 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,95 @@ bytearray_resize_impl(PyByteArrayObject *self, Py_ssize_t size)
}


/*[clinic input]
@critical_section
bytearray.take_bytes
n: object = None
Bytes to take, negative indexes from end. None indicates all bytes.
/
Take *n* bytes from the bytearray and return them as a bytes object.
[clinic start generated code]*/

static PyObject *
bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n)
/*[clinic end generated code: output=3147fbc0bbbe8d94 input=b15b5172cdc6deda]*/
{
Py_ssize_t to_take, original;
Py_ssize_t size = Py_SIZE(self);
if (Py_IsNone(n)) {
to_take = original = size;
}
// Integer index, from start (zero, positive) or end (negative).
else if (_PyIndex_Check(n)) {
to_take = original = PyNumber_AsSsize_t(n, PyExc_IndexError);
if (to_take == -1 && PyErr_Occurred()) {
return NULL;
}
if (to_take < 0) {
to_take += size;
}
} else {
PyErr_SetString(PyExc_TypeError, "n must be an integer or None");
return NULL;
}

if (to_take < 0 || to_take > size) {
PyErr_Format(PyExc_IndexError,
"can't take %d(%d) outside size %d",
original, to_take, size);
return NULL;
}

// Exports may change the contents, No mutable bytes allowed.
if (!_canresize(self)) {
return NULL;
}

if (to_take == 0 || size == 0) {
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
}

// Copy remaining bytes to a new bytes.
PyObject *remaining = NULL;
Py_ssize_t remaining_length = size - to_take;
if (remaining_length > 0) {
// +1 to copy across the null which always ends a bytearray.
remaining = PyBytes_FromStringAndSize(self->ob_start + to_take,
remaining_length + 1);
if (remaining == NULL) {
return NULL;
}
}

// If the bytes are offset inside the buffer must first align.
if (self->ob_start != self->ob_bytes) {
memmove(self->ob_bytes, self->ob_start, to_take);
self->ob_start = self->ob_bytes;
}

if (_PyBytes_Resize(&self->ob_bytes_object, to_take) == -1) {
Py_CLEAR(remaining);
return NULL;
}

// Point the bytearray towards the buffer with the remaining data.
PyObject *result = self->ob_bytes_object;
self->ob_bytes_object = remaining;
if (remaining) {
self->ob_bytes = self->ob_start = PyBytes_AS_STRING(self->ob_bytes_object);
Py_SET_SIZE(self, size - to_take);
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, size - to_take + 1);
}
else {
self->ob_bytes = self->ob_start = NULL;
Py_SET_SIZE(self, 0);
FT_ATOMIC_STORE_SSIZE_RELAXED(self->ob_alloc, 0);
}

return result;
}


/*[clinic input]
@critical_section
bytearray.translate
Expand Down Expand Up @@ -2690,6 +2779,7 @@ static PyMethodDef bytearray_methods[] = {
BYTEARRAY_STARTSWITH_METHODDEF
BYTEARRAY_STRIP_METHODDEF
{"swapcase", bytearray_swapcase, METH_NOARGS, _Py_swapcase__doc__},
BYTEARRAY_TAKE_BYTES_METHODDEF
{"title", bytearray_title, METH_NOARGS, _Py_title__doc__},
BYTEARRAY_TRANSLATE_METHODDEF
{"upper", bytearray_upper, METH_NOARGS, _Py_upper__doc__},
Expand Down
39 changes: 38 additions & 1 deletion Objects/clinic/bytearrayobject.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.