Skip to content

Commit

Permalink
Simplify {Set,Map}.fromDistinct{Asc,Desc}List (#1029)
Browse files Browse the repository at this point in the history
Uses only the Stack, making FromDistinctMonoState unnecessary.
This implementation also allows for quick access to the last element,
which may be used in fromAscListWith, mapKeysWith, etc.
  • Loading branch information
meooow25 authored Aug 31, 2024
1 parent 4af12df commit 41005b5
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 108 deletions.
72 changes: 31 additions & 41 deletions containers/src/Data/Map/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -358,13 +358,12 @@ module Data.Map.Internal (
, link
, link2
, glue
, fromDistinctAscList_linkTop
, fromDistinctAscList_linkAll
, fromDistinctDescList_linkTop
, fromDistinctDescList_linkAll
, ascLinkTop
, ascLinkAll
, descLinkTop
, descLinkAll
, MaybeS(..)
, Identity(..)
, FromDistinctMonoState(..)
, Stack(..)
, foldl'Stack

Expand Down Expand Up @@ -3833,28 +3832,25 @@ fromDescListWithKey f xs
-- > valid (fromDistinctAscList [(3,"b"), (5,"a")]) == True
-- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False

-- For some reason, when 'singleton' is used in fromDistinctAscList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctAscList :: [(k,a)] -> Map k a
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
fromDistinctAscList = ascLinkAll . Foldable.foldl' next Nada
where
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 l stk) (kx, x) = State0 (Push kx x l stk)
next :: Stack k a -> (k, a) -> Stack k a
next (Push kx x Tip stk) (!ky, y) = ascLinkTop stk 1 (singleton kx x) ky y
next stk (!kx, x) = Push kx x Tip stk
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion

fromDistinctAscList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
fromDistinctAscList_linkTop r@(Bin rsz _ _ _ _) (Push kx x l@(Bin lsz _ _ _ _) stk)
| rsz == lsz = fromDistinctAscList_linkTop (bin kx x l r) stk
fromDistinctAscList_linkTop l stk = State1 l stk
{-# INLINABLE fromDistinctAscList_linkTop #-}
ascLinkTop :: Stack k a -> Int -> Map k a -> k -> a -> Stack k a
ascLinkTop (Push kx x l@(Bin lsz _ _ _ _) stk) !rsz r ky y
| lsz == rsz = ascLinkTop stk sz (Bin sz kx x l r) ky y
where
sz = lsz + rsz + 1
ascLinkTop stk !_ l kx x = Push kx x l stk

fromDistinctAscList_linkAll :: FromDistinctMonoState k a -> Map k a
fromDistinctAscList_linkAll (State0 stk) = foldl'Stack (\r kx x l -> link kx x l r) Tip stk
fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r kx x l -> link kx x l r) r0 stk
{-# INLINABLE fromDistinctAscList_linkAll #-}
ascLinkAll :: Stack k a -> Map k a
ascLinkAll stk = foldl'Stack (\r kx x l -> link kx x l r) Tip stk
{-# INLINABLE ascLinkAll #-}

-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
-- /The precondition is not checked./
Expand All @@ -3865,32 +3861,26 @@ fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r kx x l -> link kx
--
-- @since 0.5.8

-- For some reason, when 'singleton' is used in fromDistinctDescList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctDescList :: [(k,a)] -> Map k a
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
where
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 r stk) (kx, x) = State0 (Push kx x r stk)
next :: Stack k a -> (k, a) -> Stack k a
next (Push ky y Tip stk) (!kx, x) = descLinkTop kx x 1 (singleton ky y) stk
next stk (!ky, y) = Push ky y Tip stk
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion

fromDistinctDescList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
fromDistinctDescList_linkTop l@(Bin lsz _ _ _ _) (Push kx x r@(Bin rsz _ _ _ _) stk)
| lsz == rsz = fromDistinctDescList_linkTop (bin kx x l r) stk
fromDistinctDescList_linkTop r stk = State1 r stk
{-# INLINABLE fromDistinctDescList_linkTop #-}

fromDistinctDescList_linkAll :: FromDistinctMonoState k a -> Map k a
fromDistinctDescList_linkAll (State0 stk) = foldl'Stack (\l kx x r -> link kx x l r) Tip stk
fromDistinctDescList_linkAll (State1 l0 stk) = foldl'Stack (\l kx x r -> link kx x l r) l0 stk
{-# INLINABLE fromDistinctDescList_linkAll #-}
descLinkTop :: k -> a -> Int -> Map k a -> Stack k a -> Stack k a
descLinkTop kx x !lsz l (Push ky y r@(Bin rsz _ _ _ _) stk)
| lsz == rsz = descLinkTop kx x sz (Bin sz ky y l r) stk
where
sz = lsz + rsz + 1
descLinkTop ky y !_ r stk = Push ky y r stk
{-# INLINABLE descLinkTop #-}

data FromDistinctMonoState k a
= State0 !(Stack k a)
| State1 !(Map k a) !(Stack k a)
descLinkAll :: Stack k a -> Map k a
descLinkAll stk = foldl'Stack (\l kx x r -> link kx x l r) Tip stk
{-# INLINABLE descLinkAll #-}

data Stack k a = Push !k a !(Map k a) !(Stack k a) | Nada

Expand Down
31 changes: 12 additions & 19 deletions containers/src/Data/Map/Strict/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,10 @@ import Data.Map.Internal
, filterAMissing
, merge
, mergeA
, fromDistinctAscList_linkTop
, fromDistinctAscList_linkAll
, fromDistinctDescList_linkTop
, fromDistinctDescList_linkAll
, FromDistinctMonoState (..)
, ascLinkTop
, ascLinkAll
, descLinkTop
, descLinkAll
, Stack (..)
, (!)
, (!?)
Expand Down Expand Up @@ -1733,16 +1732,13 @@ fromDescListWithKey f xs0 = fromDistinctDescList xs1
-- > valid (fromDistinctAscList [(3,"b"), (5,"a")]) == True
-- > valid (fromDistinctAscList [(3,"b"), (5,"a"), (5,"b")]) == False

-- For some reason, when 'singleton' is used in fromDistinctAscList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctAscList :: [(k,a)] -> Map k a
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
fromDistinctAscList = ascLinkAll . Foldable.foldl' next Nada
where
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, !x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 l stk) (!kx, !x) = State0 (Push kx x l stk)
next :: Stack k a -> (k, a) -> Stack k a
next (Push kx x Tip stk) (!ky, !y) = ascLinkTop stk 1 (singleton kx x) ky y
next stk (!kx, !x) = Push kx x Tip stk
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion

-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
Expand All @@ -1752,14 +1748,11 @@ fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0
-- > valid (fromDistinctDescList [(5,"a"), (3,"b")]) == True
-- > valid (fromDistinctDescList [(5,"a"), (3,"b"), (3,"a")]) == False

-- For some reason, when 'singleton' is used in fromDistinctDescList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctDescList :: [(k,a)] -> Map k a
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
where
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, !x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 r stk) (!kx, !x) = State0 (Push kx x r stk)
next :: Stack k a -> (k, a) -> Stack k a
next (Push ky y Tip stk) (!kx, !x) = descLinkTop kx x 1 (singleton ky y) stk
next stk (!ky, !y) = Push ky y Tip stk
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion
91 changes: 43 additions & 48 deletions containers/src/Data/Set/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1208,60 +1208,50 @@ combineEq (x : xs) = combineEq' x xs
-- | \(O(n)\). Build a set from an ascending list of distinct elements in linear time.
-- /The precondition (input list is strictly ascending) is not checked./

-- For some reason, when 'singleton' is used in fromDistinctAscList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation]
fromDistinctAscList :: [a] -> Set a
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
fromDistinctAscList = ascLinkAll . Foldable.foldl' next Nada
where
next :: FromDistinctMonoState a -> a -> FromDistinctMonoState a
next (State0 stk) !x = fromDistinctAscList_linkTop (Bin 1 x Tip Tip) stk
next (State1 l stk) x = State0 (Push x l stk)
next :: Stack a -> a -> Stack a
next (Push x Tip stk) !y = ascLinkTop stk 1 (singleton x) y
next stk !x = Push x Tip stk
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion

fromDistinctAscList_linkTop :: Set a -> Stack a -> FromDistinctMonoState a
fromDistinctAscList_linkTop r@(Bin rsz _ _ _) (Push x l@(Bin lsz _ _ _) stk)
| rsz == lsz = fromDistinctAscList_linkTop (bin x l r) stk
fromDistinctAscList_linkTop l stk = State1 l stk
{-# INLINABLE fromDistinctAscList_linkTop #-}
ascLinkTop :: Stack a -> Int -> Set a -> a -> Stack a
ascLinkTop (Push x l@(Bin lsz _ _ _) stk) !rsz r y
| lsz == rsz = ascLinkTop stk sz (Bin sz x l r) y
where
sz = lsz + rsz + 1
ascLinkTop stk !_ r y = Push y r stk

fromDistinctAscList_linkAll :: FromDistinctMonoState a -> Set a
fromDistinctAscList_linkAll (State0 stk) = foldl'Stack (\r x l -> link x l r) Tip stk
fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r x l -> link x l r) r0 stk
{-# INLINABLE fromDistinctAscList_linkAll #-}
ascLinkAll :: Stack a -> Set a
ascLinkAll stk = foldl'Stack (\r x l -> link x l r) Tip stk
{-# INLINABLE ascLinkAll #-}

-- | \(O(n)\). Build a set from a descending list of distinct elements in linear time.
-- /The precondition (input list is strictly descending) is not checked./
--
-- @since 0.5.8

-- For some reason, when 'singleton' is used in fromDistinctDescList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation]
fromDistinctDescList :: [a] -> Set a
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
where
next :: FromDistinctMonoState a -> a -> FromDistinctMonoState a
next (State0 stk) !x = fromDistinctDescList_linkTop (Bin 1 x Tip Tip) stk
next (State1 r stk) x = State0 (Push x r stk)
next :: Stack a -> a -> Stack a
next (Push y Tip stk) !x = descLinkTop x 1 (singleton y) stk
next stk !y = Push y Tip stk
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion

fromDistinctDescList_linkTop :: Set a -> Stack a -> FromDistinctMonoState a
fromDistinctDescList_linkTop l@(Bin lsz _ _ _) (Push x r@(Bin rsz _ _ _) stk)
| lsz == rsz = fromDistinctDescList_linkTop (bin x l r) stk
fromDistinctDescList_linkTop r stk = State1 r stk
{-# INLINABLE fromDistinctDescList_linkTop #-}

fromDistinctDescList_linkAll :: FromDistinctMonoState a -> Set a
fromDistinctDescList_linkAll (State0 stk) = foldl'Stack (\l x r -> link x l r) Tip stk
fromDistinctDescList_linkAll (State1 l0 stk) = foldl'Stack (\l x r -> link x l r) l0 stk
{-# INLINABLE fromDistinctDescList_linkAll #-}
descLinkTop :: a -> Int -> Set a -> Stack a -> Stack a
descLinkTop x !lsz l (Push y r@(Bin rsz _ _ _) stk)
| lsz == rsz = descLinkTop x sz (Bin sz y l r) stk
where
sz = lsz + rsz + 1
descLinkTop y !_ r stk = Push y r stk

data FromDistinctMonoState a
= State0 !(Stack a)
| State1 !(Set a) !(Stack a)
descLinkAll :: Stack a -> Set a
descLinkAll stk = foldl'Stack (\l x r -> link x l r) Tip stk
{-# INLINABLE descLinkAll #-}

data Stack a = Push !a !(Set a) !(Stack a) | Nada

Expand Down Expand Up @@ -2183,24 +2173,29 @@ validsize t
-- fromDistinctAscList is implemented by building up perfectly balanced trees
-- while we consume elements from the list one by one. A stack of
-- (root, perfectly balanced left branch) pairs is maintained, in increasing
-- order of size from top to bottom.
--
-- When we get an element from the list, we attempt to link it as the right
-- branch with the top (root, perfect left branch) of the stack to create a new
-- perfect tree. We can only do this if the left branch has size 1. If we link
-- it, we get a perfect tree of size 3. We repeat this process, merging with the
-- top of the stack as long as the sizes match. When we can't link any more, the
-- perfect tree we built so far is a potential left branch. The next element
-- we find becomes the root, and we push this new (root, left branch) on the
-- stack.
-- order of size from top to bottom. The stack reflects the binary
-- representation of the total number of elements in it, with every level having
-- a power of 2 number of elements.
--
-- When we get an element from the list, we check the (root, left branch) at the
-- top of the stack.
-- If the tree there is not empty, we push the element with an empty left child
-- on the stack.
-- If the tree is empty, the root is packed into a singleton tree to act as a
-- right branch for trees higher up the stack. It is linked with left branches
-- in the stack, but only when they have equal size. This preserves the
-- perfectly balanced property. When there is a size mismatch, the tree is
-- too small to link. It is pushed on the stack as a left branch with the new
-- element as root, awaiting a right branch which will make it large enough to
-- be linked further.
--
-- When we are out of elements, we link the (root, left branch)s in the stack
-- top to bottom to get the final tree.
--
-- How long does this take? We do O(1) work per element excluding the links.
-- Over n elements, we build trees with at most n nodes total, and each link is
-- done in O(1) using `bin`. The final linking of the stack is done in O(log n)
-- using `link` (proof below). The total time is thus O(n).
-- done in O(1) using `Bin`. The final linking of the stack is done in O(log n)
-- using `link` (proof below). The total time is thus O(n).
--
-- Additionally, the implemention is written using foldl' over the input list,
-- which makes it participate as a good consumer in list fusion.
Expand Down

0 comments on commit 41005b5

Please sign in to comment.