Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Speed up parsing bytes literals concatenation by using PyBytesWriter API and
a single memory allocation (about 3x faster).
39 changes: 33 additions & 6 deletions Parser/action_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1612,19 +1612,46 @@ _build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno,
Py_ssize_t len = asdl_seq_LEN(strings);
assert(len > 0);

PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);

/* Bytes literals never get a kind, but just for consistency
since they are represented as Constant nodes, we'll mirror
the same behavior as unicode strings for determining the
kind. */
PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind;

Py_ssize_t total = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bit of a meta question: How much performance change is it to not pre-calculate the length? The precalculation + memcpy makes this code quite a bit more complex. If it's only a couple percentage difference (so most the 3x is kept) the simpler code for some of these would be nice

for (Py_ssize_t i = 0; i < len; i++) {
expr_ty elem = asdl_seq_GET(strings, i);
PyObject *bytes = elem->v.Constant.value;
Py_ssize_t part = PyBytes_GET_SIZE(bytes);
if (part > PY_SSIZE_T_MAX - total) {
PyErr_NoMemory();
return NULL;
}
total += part;
}

PyBytesWriter *writer = PyBytesWriter_Create(total);
if (writer == NULL) {
return NULL;
}
char *out = PyBytesWriter_GetData(writer);

for (Py_ssize_t i = 0; i < len; i++) {
expr_ty elem = asdl_seq_GET(strings, i);
PyBytes_Concat(&res, elem->v.Constant.value);
PyObject *bytes = elem->v.Constant.value;
Py_ssize_t part = PyBytes_GET_SIZE(bytes);
if (part > 0) {
memcpy(out, PyBytes_AS_STRING(bytes), part);
out += part;
}
}
if (!res || _PyArena_AddPyObject(arena, res) < 0) {
Py_XDECREF(res);

PyObject *res = PyBytesWriter_Finish(writer);
if (res == NULL) {
return NULL;
}
if (_PyArena_AddPyObject(arena, res) < 0) {
Py_DECREF(res);
return NULL;
}
return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);
Expand Down
Loading