Skip to content

Function _quickSort() can be optimized #6289

@gpersoon

Description

@gpersoon

🧐 Motivation
Function _quickSort() is limited in the number of items that can be sorted, especially for the worst case where the array is already (largely) sorted (in a reverse way).

📝 Details
The limitation is relateed to the stack dept of 1024 and the relatively large usage of stack per recusion (3 parameters passed and 2 variables allocated). This allows for a recurstion dept of only 169.

The function _quickSort() does two recursive calls. In most programming languages that is fine because they support tail recursion, which optimizes stack usage for a recursive call at the end of the function. However Solidity does not do this optimization.

📝 POC

The following POC shows the issue:

  • test_sort1: N1=169 gas=1943416
  • test_sort2: N2=1000 gas=67212880

Setting N1 to 170 results in: [FAIL: EvmError: StackOverflow]

// SPDX-License-Identifier: MIT
pragma solidity ^0.8.28;

import 'forge-std/Test.sol';

contract TestRecursion {

    function gtComparator(uint256 a, uint256 b) internal pure returns (bool) {
        return a > b;
    }
  
    uint constant N1 = 169; // 170 -> StackOverflow with _quickSort1  
    function test_sort1() public view {
        uint256[] memory a1 = new uint256[](N1);
        for (uint i=0;i<N1;i++) {
           a1[i] = 1000 -i ;//% 20;
        }
        uint g1;uint g2;      
        g1 = gasleft();_quickSort1(_begin(a1), _end(a1),gtComparator);g2=gasleft();console.log("1 N1=%d gas=%d", N1,g1-g2);
    }

    uint constant N2 = 1000; 
    function test_sort2() public view {
        uint256[] memory a2 = new uint256[](N2);
        for (uint i=0;i<N2;i++) {
           a2[i] = 10000 -i ;//% 20;  
        }
        uint g1;uint g2;      
        g1 = gasleft();_quickSort2(_begin(a2), _end(a2),gtComparator);g2=gasleft();console.log("2 N2=%d gas=%d", N2,g1-g2);
    }
  

    function _quickSort1(
        uint256 begin,
        uint256 end,
        function(uint256, uint256) pure returns (bool) comp
    ) private pure {
        unchecked {
            if (end - begin < 0x40) return;
            uint256 pivot = _mload(begin);
            uint256 pos = begin;
            for (uint256 it = begin + 0x20; it < end; it += 0x20) {
                if (comp(_mload(it), pivot)) {
                pos += 0x20;
                _swap(pos, it);
                }
            }
            _swap(begin, pos); // Swap pivot into place
            _quickSort1(begin, pos, comp); // Sort the left side of the pivot
            _quickSort1(pos + 0x20, end, comp); // Sort the right side of the pivot
        }
    }

    function _quickSort2(
        uint256 begin,
        uint256 end,
        function(uint256, uint256) pure returns (bool) comp
    ) private pure {
        unchecked {
            while (true) {
                if (end - begin < 0x40) return;
                //console.log(begin,end);
                uint256 pivot = _mload(begin);
                uint256 pos = begin;
                for (uint256 it = begin + 0x20; it < end; it += 0x20) {
                    if (comp(_mload(it), pivot)) {
                      pos += 0x20;
                      _swap(pos, it);
                    }
                }
                _swap(begin, pos); // Swap pivot into place
                if (pos - begin < end - (pos + 0x20) ) {
                    _quickSort2(begin, pos, comp); // Sort the left side of the pivot
                    begin = pos + 0x20; // do another loop without recursion 
                }
                else {
                    _quickSort2(pos + 0x20, end, comp); // Sort the right side of the pivot
                    end = pos; // do another loop without recursion           
                }
            }
        }
    }
    function _begin(uint256[] memory array) private pure returns (uint256 ptr) {
        assembly ('memory-safe') {ptr := add(array, 0x20) }
    }
    function _end(uint256[] memory array) private pure returns (uint256 ptr) {
        unchecked {
            return _begin(array) + array.length * 0x20;
        }
    }
    function _mload(uint256 ptr) private pure returns (uint256 value) {
        assembly { value := mload(ptr) }
    }
    function _swap(uint256 ptr1, uint256 ptr2) private pure {
        assembly {
          let value1 := mload(ptr1)
          let value2 := mload(ptr2)
          mstore(ptr1, value2)
          mstore(ptr2, value1)
        }
    }
}

📝 Suggestion

If the recursion of the largest subset is replaced by a loop, then the total amount of recursions is reduced considerably.

This can be done in the following way:

function _quickSort( uint256 begin, uint256 end, function(uint256, uint256) pure returns (bool) comp) private pure {
    unchecked {
+       while (true) {
            if (end - begin < 0x40) return;
            uint256 pivot = _mload(begin);
            uint256 pos = begin;
            for (uint256 it = begin + 0x20; it < end; it += 0x20) {
                if (comp(_mload(it), pivot)) {
                    pos += 0x20;
                    _swap(pos, it);
                }
            }
            _swap(begin, pos); // Swap pivot into place
+           if (pos - begin < end - (pos + 0x20) ) {
                _quickSort2(begin, pos, comp); // Sort the left side of the pivot
+               begin = pos + 0x20; // do another loop without recursion 
+           } else {
                _quickSort2(pos + 0x20, end, comp); // Sort the right side of the pivot
+               end = pos; // do another loop without recursion           
+           }
+       }
    }
}

Further optimization is also possible by reducing the size of the parameters passed and by reducing the allocated variables in the function. All used variables could be fit in a uint64 and multiple uint64s could be combined in one uint256. However this makes the code more complicated.

Alternatively other sort algoritms could be used: iterative quicksort / three-way partitioning / timsort

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions