-
Notifications
You must be signed in to change notification settings - Fork 12.4k
Description
🧐 Motivation
Function _quickSort() is limited in the number of items that can be sorted, especially for the worst case where the array is already (largely) sorted (in a reverse way).
📝 Details
The limitation is relateed to the stack dept of 1024 and the relatively large usage of stack per recusion (3 parameters passed and 2 variables allocated). This allows for a recurstion dept of only 169.
The function _quickSort() does two recursive calls. In most programming languages that is fine because they support tail recursion, which optimizes stack usage for a recursive call at the end of the function. However Solidity does not do this optimization.
📝 POC
The following POC shows the issue:
- test_sort1: N1=169 gas=1943416
- test_sort2: N2=1000 gas=67212880
Setting N1 to 170 results in: [FAIL: EvmError: StackOverflow]
// SPDX-License-Identifier: MIT
pragma solidity ^0.8.28;
import 'forge-std/Test.sol';
contract TestRecursion {
function gtComparator(uint256 a, uint256 b) internal pure returns (bool) {
return a > b;
}
uint constant N1 = 169; // 170 -> StackOverflow with _quickSort1
function test_sort1() public view {
uint256[] memory a1 = new uint256[](N1);
for (uint i=0;i<N1;i++) {
a1[i] = 1000 -i ;//% 20;
}
uint g1;uint g2;
g1 = gasleft();_quickSort1(_begin(a1), _end(a1),gtComparator);g2=gasleft();console.log("1 N1=%d gas=%d", N1,g1-g2);
}
uint constant N2 = 1000;
function test_sort2() public view {
uint256[] memory a2 = new uint256[](N2);
for (uint i=0;i<N2;i++) {
a2[i] = 10000 -i ;//% 20;
}
uint g1;uint g2;
g1 = gasleft();_quickSort2(_begin(a2), _end(a2),gtComparator);g2=gasleft();console.log("2 N2=%d gas=%d", N2,g1-g2);
}
function _quickSort1(
uint256 begin,
uint256 end,
function(uint256, uint256) pure returns (bool) comp
) private pure {
unchecked {
if (end - begin < 0x40) return;
uint256 pivot = _mload(begin);
uint256 pos = begin;
for (uint256 it = begin + 0x20; it < end; it += 0x20) {
if (comp(_mload(it), pivot)) {
pos += 0x20;
_swap(pos, it);
}
}
_swap(begin, pos); // Swap pivot into place
_quickSort1(begin, pos, comp); // Sort the left side of the pivot
_quickSort1(pos + 0x20, end, comp); // Sort the right side of the pivot
}
}
function _quickSort2(
uint256 begin,
uint256 end,
function(uint256, uint256) pure returns (bool) comp
) private pure {
unchecked {
while (true) {
if (end - begin < 0x40) return;
//console.log(begin,end);
uint256 pivot = _mload(begin);
uint256 pos = begin;
for (uint256 it = begin + 0x20; it < end; it += 0x20) {
if (comp(_mload(it), pivot)) {
pos += 0x20;
_swap(pos, it);
}
}
_swap(begin, pos); // Swap pivot into place
if (pos - begin < end - (pos + 0x20) ) {
_quickSort2(begin, pos, comp); // Sort the left side of the pivot
begin = pos + 0x20; // do another loop without recursion
}
else {
_quickSort2(pos + 0x20, end, comp); // Sort the right side of the pivot
end = pos; // do another loop without recursion
}
}
}
}
function _begin(uint256[] memory array) private pure returns (uint256 ptr) {
assembly ('memory-safe') {ptr := add(array, 0x20) }
}
function _end(uint256[] memory array) private pure returns (uint256 ptr) {
unchecked {
return _begin(array) + array.length * 0x20;
}
}
function _mload(uint256 ptr) private pure returns (uint256 value) {
assembly { value := mload(ptr) }
}
function _swap(uint256 ptr1, uint256 ptr2) private pure {
assembly {
let value1 := mload(ptr1)
let value2 := mload(ptr2)
mstore(ptr1, value2)
mstore(ptr2, value1)
}
}
}📝 Suggestion
If the recursion of the largest subset is replaced by a loop, then the total amount of recursions is reduced considerably.
This can be done in the following way:
function _quickSort( uint256 begin, uint256 end, function(uint256, uint256) pure returns (bool) comp) private pure {
unchecked {
+ while (true) {
if (end - begin < 0x40) return;
uint256 pivot = _mload(begin);
uint256 pos = begin;
for (uint256 it = begin + 0x20; it < end; it += 0x20) {
if (comp(_mload(it), pivot)) {
pos += 0x20;
_swap(pos, it);
}
}
_swap(begin, pos); // Swap pivot into place
+ if (pos - begin < end - (pos + 0x20) ) {
_quickSort2(begin, pos, comp); // Sort the left side of the pivot
+ begin = pos + 0x20; // do another loop without recursion
+ } else {
_quickSort2(pos + 0x20, end, comp); // Sort the right side of the pivot
+ end = pos; // do another loop without recursion
+ }
+ }
}
}Further optimization is also possible by reducing the size of the parameters passed and by reducing the allocated variables in the function. All used variables could be fit in a uint64 and multiple uint64s could be combined in one uint256. However this makes the code more complicated.
Alternatively other sort algoritms could be used: iterative quicksort / three-way partitioning / timsort