Skip to content

Commit

Permalink
Build Set and Map more efficiently
Browse files Browse the repository at this point in the history
Use "Builder"s to implement the functions:

* Set.fromList
* Set.map
* Map.fromList
* Map.fromListWith
* Map.fromListWithKey
* Map.mapKeys
* Map.mapKeysWith

As a result,

* All fromList functions above are now good consumers in terms of list
  fusion.
* Set.fromList and Map.fromList are now O(n) for input in non-decreasing
  order. They were already O(n) for input in strictly increasing order.
* Map.fromListWith and Map.fromListWithKey are also O(n) for input
  in non-decreasing order.
* Set.map, Map.mapKeys, Map.mapKeysWith are O(n) if the mapping is
  monotonic non-decreasing.
  • Loading branch information
meooow25 committed Sep 16, 2024
1 parent 1395671 commit c234f10
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 150 deletions.
119 changes: 65 additions & 54 deletions containers/src/Data/Map/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,9 @@ module Data.Map.Internal (
, Identity(..)
, Stack(..)
, foldl'Stack
, MapBuilder(..)
, emptyB
, finishB

-- Used by Map.Merge.Lazy
, mapWhenMissing
Expand All @@ -387,7 +390,6 @@ import Data.Semigroup (Semigroup((<>)))
#endif
import Control.Applicative (Const (..))
import Control.DeepSeq (NFData(rnf))
import Data.Bits (shiftL, shiftR)
import qualified Data.Foldable as Foldable
import Data.Bifoldable
import Utils.Containers.Internal.Prelude hiding
Expand Down Expand Up @@ -3242,7 +3244,7 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
-- > mapKeys (\ _ -> 3) (fromList [(1,"b"), (2,"a"), (3,"d"), (4,"c")]) == singleton 3 "c"

mapKeys :: Ord k2 => (k1->k2) -> Map k1 a -> Map k2 a
mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeys f = finishB . foldlWithKey' (\b kx x -> insertB (f kx) x b) emptyB
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeys #-}
#endif
Expand All @@ -3261,7 +3263,8 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) []
-- Also see the performance note on 'fromListWith'.

mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a
mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeysWith c f =
finishB . foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeysWith #-}
#endif
Expand Down Expand Up @@ -3510,46 +3513,9 @@ instance (Ord k) => GHCExts.IsList (Map k v) where
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
-- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")]

-- For some reason, when 'singleton' is used in fromList or in
-- create, it is not inlined, so we inline it manually.
fromList :: Ord k => [(k,a)] -> Map k a
fromList [] = Tip
fromList [(kx, x)] = Bin 1 kx x Tip Tip
fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip Tip) xs0
| otherwise = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
where
not_ordered _ [] = False
not_ordered kx ((ky,_) : _) = kx >= ky
{-# INLINE not_ordered #-}

fromList' t0 xs = Foldable.foldl' ins t0 xs
where ins t (k,x) = insert k x t

go !_ t [] = t
go _ t [(kx, x)] = insertMax kx x t
go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs
| otherwise = case create s xss of
(r, ys, []) -> go (s `shiftL` 1) (link kx x l r) ys
(r, _, ys) -> fromList' (link kx x l r) ys

-- The create is returning a triple (tree, xs, ys). Both xs and ys
-- represent not yet processed elements and only one of them can be nonempty.
-- If ys is nonempty, the keys in ys are not ordered with respect to tree
-- and must be inserted using fromList'. Otherwise the keys have been
-- ordered so far.
create !_ [] = (Tip, [], [])
create s xs@(xp : xss)
| s == 1 = case xp of (kx, x) | not_ordered kx xss -> (Bin 1 kx x Tip Tip, [], xss)
| otherwise -> (Bin 1 kx x Tip Tip, xss, [])
| otherwise = case create (s `shiftR` 1) xs of
res@(_, [], _) -> res
(l, [(ky, y)], zs) -> (insertMax ky y l, [], zs)
(l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys)
| otherwise -> case create (s `shiftR` 1) yss of
(r, zs, ws) -> (link ky y l r, zs, ws)
#if __GLASGOW_HASKELL__
{-# INLINABLE fromList #-}
#endif
fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs)
{-# INLINE fromList #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'.
--
Expand Down Expand Up @@ -3588,11 +3554,9 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip T
-- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples

fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
fromListWith f xs
= fromListWithKey (\_ x y -> f x y) xs
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWith #-}
#endif
fromListWith f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs)
{-# INLINE fromListWith #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'.
--
Expand All @@ -3603,13 +3567,9 @@ fromListWith f xs
-- Also see the performance note on 'fromListWith'.

fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a
fromListWithKey f xs
= Foldable.foldl' ins empty xs
where
ins t (k,x) = insertWithKey f k x t
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWithKey #-}
#endif
fromListWithKey f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs)
{-# INLINE fromListWithKey #-} -- INLINE for fusion

-- | \(O(n)\). Convert the map to a list of key\/value pairs. Subject to list fusion.
--
Expand Down Expand Up @@ -4004,6 +3964,57 @@ splitMember k0 m = case go k0 m of

data StrictTriple a b c = StrictTriple !a !b !c

{--------------------------------------------------------------------
MapBuilder
--------------------------------------------------------------------}

-- See Note [SetBuilder] in Data.Set.Internal

data MapBuilder k a
= BAsc !(Stack k a)
| BMap !(Map k a)

-- Empty builder.
emptyB :: MapBuilder k a
emptyB = BAsc Nada

-- Insert a key and value. Replaces the old value if one already exists for
-- the key.
insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a
insertB !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insert ky y (ascLinkAll stk))
EQ -> BAsc (Push ky y l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insert ky y m)
{-# INLINE insertB #-}

-- Insert a key and value. The new value is combined with the old value if one
-- already exists for the key.
insertWithB
:: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a
insertWithB f !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insertWith f ky y (ascLinkAll stk))
EQ -> BAsc (Push ky (f y x) l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insertWith f ky y m)
{-# INLINE insertWithB #-}

-- Finalize the builder into a Map.
finishB :: MapBuilder k a -> Map k a
finishB (BAsc stk) = ascLinkAll stk
finishB (BMap m) = m
{-# INLINABLE finishB #-}

{--------------------------------------------------------------------
Utility functions that maintain the balance properties of the tree.
All constructors assume that all values in [l] < [k] and all values
Expand Down
104 changes: 50 additions & 54 deletions containers/src/Data/Map/Strict/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ import Data.Map.Internal
, descLinkTop
, descLinkAll
, Stack (..)
, MapBuilder(..)
, emptyB
, finishB
, (!)
, (!?)
, (\\)
Expand Down Expand Up @@ -375,7 +378,6 @@ import Data.Map.Internal
, foldrWithKey
, foldrWithKey'
, glue
, insertMax
, intersection
, isProperSubmapOf
, isProperSubmapOfBy
Expand Down Expand Up @@ -433,7 +435,6 @@ import qualified Data.Set.Internal as Set
import qualified Data.Map.Internal as L
import Utils.Containers.Internal.StrictPair

import Data.Bits (shiftL, shiftR)
#ifdef __GLASGOW_HASKELL__
import Data.Coerce
#endif
Expand Down Expand Up @@ -1451,7 +1452,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) =
-- Also see the performance note on 'fromListWith'.

mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a
mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) []
mapKeysWith c f =
finishB . foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB
#if __GLASGOW_HASKELL__
{-# INLINABLE mapKeysWith #-}
#endif
Expand Down Expand Up @@ -1492,46 +1494,9 @@ fromArgSet (Set.Bin sz (Arg x v) l r) = v `seq` Bin sz x v (fromArgSet l) (fromA
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
-- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")]

-- For some reason, when 'singleton' is used in fromList or in
-- create, it is not inlined, so we inline it manually.
fromList :: Ord k => [(k,a)] -> Map k a
fromList [] = Tip
fromList [(kx, x)] = x `seq` Bin 1 kx x Tip Tip
fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0 x0 Tip Tip) xs0
| otherwise = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
where
not_ordered _ [] = False
not_ordered kx ((ky,_) : _) = kx >= ky
{-# INLINE not_ordered #-}

fromList' t0 xs = Foldable.foldl' ins t0 xs
where ins t (k,x) = insert k x t

go !_ t [] = t
go _ t [(kx, x)] = x `seq` insertMax kx x t
go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs
| otherwise = case create s xss of
(r, ys, []) -> x `seq` go (s `shiftL` 1) (link kx x l r) ys
(r, _, ys) -> x `seq` fromList' (link kx x l r) ys

-- The create is returning a triple (tree, xs, ys). Both xs and ys
-- represent not yet processed elements and only one of them can be nonempty.
-- If ys is nonempty, the keys in ys are not ordered with respect to tree
-- and must be inserted using fromList'. Otherwise the keys have been
-- ordered so far.
create !_ [] = (Tip, [], [])
create s xs@(xp : xss)
| s == 1 = case xp of (kx, x) | not_ordered kx xss -> x `seq` (Bin 1 kx x Tip Tip, [], xss)
| otherwise -> x `seq` (Bin 1 kx x Tip Tip, xss, [])
| otherwise = case create (s `shiftR` 1) xs of
res@(_, [], _) -> res
(l, [(ky, y)], zs) -> y `seq` (insertMax ky y l, [], zs)
(l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys)
| otherwise -> case create (s `shiftR` 1) yss of
(r, zs, ws) -> y `seq` (link ky y l r, zs, ws)
#if __GLASGOW_HASKELL__
{-# INLINABLE fromList #-}
#endif
fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs)
{-# INLINE fromList #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'.
--
Expand Down Expand Up @@ -1570,11 +1535,9 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0
-- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples

fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
fromListWith f xs
= fromListWithKey (\_ x y -> f x y) xs
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWith #-}
#endif
fromListWith f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs)
{-# INLINE fromListWith #-} -- INLINE for fusion

-- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'.
--
Expand All @@ -1585,13 +1548,9 @@ fromListWith f xs
-- Also see the performance note on 'fromListWith'.

fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a
fromListWithKey f xs
= Foldable.foldl' ins empty xs
where
ins t (k,x) = insertWithKey f k x t
#if __GLASGOW_HASKELL__
{-# INLINABLE fromListWithKey #-}
#endif
fromListWithKey f xs =
finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs)
{-# INLINE fromListWithKey #-} -- INLINE for fusion

{--------------------------------------------------------------------
Building trees from ascending/descending lists can be done in linear time.
Expand Down Expand Up @@ -1756,3 +1715,40 @@ fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada
next (Push ky y Tip stk) (!kx, !x) = descLinkTop kx x 1 (singleton ky y) stk
next stk (!ky, !y) = Push ky y Tip stk
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion

{--------------------------------------------------------------------
MapBuilder
--------------------------------------------------------------------}

-- Insert a key and value. Replaces the old value if one already exists for
-- the key. Strict in the value.
insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a
insertB !ky !y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insert ky y (ascLinkAll stk))
EQ -> BAsc (Push ky y l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (Push ky y Tip stk)
Nada -> BAsc (Push ky y Tip Nada)
BMap m -> BMap (insert ky y m)
{-# INLINE insertB #-}

-- Insert a key and value. The new value is combined with the old value if one
-- already exists for the key. Strict in the inserted value.
insertWithB
:: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a
insertWithB f !ky y b = case b of
BAsc stk -> case stk of
Push kx x l stk' -> case compare ky kx of
LT -> BMap (insertWith f ky y (ascLinkAll stk))
EQ -> BAsc (push ky (f y x) l stk')
GT -> case l of
Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y)
Bin{} -> BAsc (push ky y Tip stk)
Nada -> BAsc (push ky y Tip Nada)
BMap m -> BMap (insertWith f ky y m)
where
push kx !x = Push kx x
{-# INLINE insertWithB #-}
Loading

0 comments on commit c234f10

Please sign in to comment.