Skip to content

Commit

Permalink
Optimize IntSet.Bin (#998)
Browse files Browse the repository at this point in the history
* Optimize IntSet.Bin

* Replace the separate Prefix and Mask Int fields in the Bin constructor
  with a single Int field which contains both merged together. This
  reduces the memory required by a Bin from 5 to 4 words, at the cost of
  more computations (which are cheap bitwise ops) being necessary for
  certains operations. This follows a similar change done for IntMap.Bin.

* Benchmarks show that runtimes for most operations remain unchanged or
  decrease by a small amount (<10%). As expected, allocations are
  consistently lower by 11-16% for all set operations that have to
  make O(log n) allocations.

* The functions and types used by both IntSet and IntMap have been moved
  into a IntTreeCommons module.

* IntSet validity: Tip cannot be empty

* Generate large keys in Arbitrary IntSet

* Fix subsetCmp error

* union, intersection, difference tests using Arbitrary IntSet

* Fix prefixOk not checking all Bins
  • Loading branch information
meooow25 authored Jun 4, 2024
1 parent c651094 commit 8562003
Show file tree
Hide file tree
Showing 13 changed files with 592 additions and 630 deletions.
2 changes: 2 additions & 0 deletions containers-tests/benchmarks/LookupGE/LookupGE_IntMap.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
module LookupGE_IntMap where

import Prelude hiding (null)
import Data.IntSet.Internal.IntTreeCommons
(Key, Prefix(..), nomatch, signBranch, left)
import Data.IntMap.Internal

lookupGE1 :: Key -> IntMap a -> Maybe (Key,a)
Expand Down
1 change: 1 addition & 0 deletions containers-tests/containers-tests.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ library
Data.IntMap.Strict.Internal
Data.IntSet
Data.IntSet.Internal
Data.IntSet.Internal.IntTreeCommons
Data.Map
Data.Map.Internal
Data.Map.Internal.Debug
Expand Down
43 changes: 22 additions & 21 deletions containers-tests/tests/IntMapValidity.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module IntMapValidity

import Data.Bits (finiteBitSize, testBit, xor, (.&.))
import Data.List (intercalate, elemIndex)
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
import Data.IntMap.Internal
import Numeric (showHex)
import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
Expand All @@ -17,7 +18,7 @@ import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
valid :: IntMap a -> Property
valid t =
counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&.
counterexample "prefixOk" (prefixOk t)
counterexample "prefixesOk" (prefixesOk t)

-- Invariant: Nil is never found as a child of Bin.
nilNeverChildOfBin :: IntMap a -> Bool
Expand All @@ -37,26 +38,26 @@ nilNeverChildOfBin t =
-- * All keys in a Bin start with the Bin's shared prefix.
-- * All keys in the Bin's left child have the Prefix's mask bit unset.
-- * All keys in the Bin's right child have the Prefix's mask bit set.
prefixOk :: IntMap a -> Property
prefixOk t =
case t of
Nil -> property ()
Tip _ _ -> property ()
Bin p l r ->
let px = unPrefix p
m = px .&. (-px)
keysl = keys l
keysr = keys r
debugStr = concat
[ "px=" ++ showIntHex px
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
]
in counterexample debugStr $
counterexample "mask bit absent" (px /= 0) .&&.
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)
prefixesOk :: IntMap a -> Property
prefixesOk t = case t of
Nil -> property ()
Tip _ _ -> property ()
Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r
where
px = unPrefix p
m = px .&. (-px)
keysl = keys l
keysr = keys r
debugStr = concat
[ "px=" ++ showIntHex px
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
]
currentOk = counterexample debugStr $
counterexample "mask bit absent" (px /= 0) .&&.
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)

hasPrefix :: Int -> Prefix -> Bool
hasPrefix i p = not (nomatch i p)
Expand Down
79 changes: 38 additions & 41 deletions containers-tests/tests/IntSetValidity.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
module IntSetValidity (valid) where

import Data.Bits (xor, (.&.))
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
import Data.IntSet.Internal
import Data.List (intercalate)
import Numeric (showHex)
import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.))
import Utils.Containers.Internal.BitUtil (bitcount)

Expand All @@ -13,9 +16,7 @@ import Utils.Containers.Internal.BitUtil (bitcount)
valid :: IntSet -> Property
valid t =
counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&.
counterexample "maskPowerOfTwo" (maskPowerOfTwo t) .&&.
counterexample "commonPrefix" (commonPrefix t) .&&.
counterexample "markRespected" (maskRespected t) .&&.
counterexample "prefixesOk" (prefixesOk t) .&&.
counterexample "tipsValid" (tipsValid t)

-- Invariant: Nil is never found as a child of Bin.
Expand All @@ -24,48 +25,41 @@ nilNeverChildOfBin t =
case t of
Nil -> True
Tip _ _ -> True
Bin _ _ l r -> noNilInSet l && noNilInSet r
Bin _ l r -> noNilInSet l && noNilInSet r
where
noNilInSet t' =
case t' of
Nil -> False
Tip _ _ -> True
Bin _ _ l' r' -> noNilInSet l' && noNilInSet r'
Bin _ l' r' -> noNilInSet l' && noNilInSet r'

-- Invariant: The Mask is a power of 2. It is the largest bit position at which
-- two elements of the set differ.
maskPowerOfTwo :: IntSet -> Bool
maskPowerOfTwo t =
case t of
Nil -> True
Tip _ _ -> True
Bin _ m l r ->
bitcount 0 (fromIntegral m) == 1 && maskPowerOfTwo l && maskPowerOfTwo r

-- Invariant: Prefix is the common high-order bits that all elements share to
-- the left of the Mask bit.
commonPrefix :: IntSet -> Bool
commonPrefix t =
case t of
Nil -> True
Tip _ _ -> True
b@(Bin p _ l r) -> all (sharedPrefix p) (elems b) && commonPrefix l && commonPrefix r
where
sharedPrefix :: Prefix -> Int -> Bool
sharedPrefix p a = p == p .&. a
-- Invariants:
-- * All keys in a Bin start with the Bin's shared prefix.
-- * All keys in the Bin's left child have the Prefix's mask bit unset.
-- * All keys in the Bin's right child have the Prefix's mask bit set.
prefixesOk :: IntSet -> Property
prefixesOk t = case t of
Nil -> property ()
Tip _ _ -> property ()
Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r
where
px = unPrefix p
m = px .&. (-px)
keysl = elems l
keysr = elems r
debugStr = concat
[ "px=" ++ showIntHex px
, ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]"
, ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]"
]
currentOk = counterexample debugStr $
counterexample "mask bit absent" (px /= 0) .&&.
counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&.
counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&.
counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr)

-- Invariant: In Bin prefix mask left right, left consists of the elements that
-- don't have the mask bit set; right is all the elements that do.
maskRespected :: IntSet -> Bool
maskRespected t =
case t of
Nil -> True
Tip _ _ -> True
Bin _ binMask l r ->
all (\x -> zero x binMask) (elems l) &&
all (\x -> not (zero x binMask)) (elems r) &&
maskRespected l &&
maskRespected r
hasPrefix :: Int -> Prefix -> Bool
hasPrefix i p = not (nomatch i p)

-- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits
-- (on 64 bit arches). The values of the set represented by a tip
Expand All @@ -76,14 +70,17 @@ tipsValid :: IntSet -> Bool
tipsValid t =
case t of
Nil -> True
tip@(Tip p b) -> validTipPrefix p
Bin _ _ l r -> tipsValid l && tipsValid r
tip@(Tip p b) -> validTipPrefix p && b /= 0
Bin _ l r -> tipsValid l && tipsValid r

validTipPrefix :: Prefix -> Bool
validTipPrefix :: Int -> Bool
#if WORD_SIZE_IN_BITS==32
-- Last 5 bits of the prefix must be zero for 32 bit arches.
validTipPrefix p = (0x0000001F .&. p) == 0
#else
-- Last 6 bits of the prefix must be zero for 64 bit arches.
validTipPrefix p = (0x000000000000003F .&. p) == 0
#endif

showIntHex :: Int -> String
showIntHex x = "0x" ++ showHex (fromIntegral x :: Word) ""
2 changes: 1 addition & 1 deletion containers-tests/tests/intmap-properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import Data.IntMap.Internal (traverseMaybeWithKey)
import Data.IntMap.Merge.Lazy
#endif
import Data.IntMap.Internal.Debug (showTree)
import Data.IntMap.Internal (Prefix(..))
import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch)
import IntMapValidity (hasPrefix, hasPrefixSimple, valid)

import Control.Applicative (Applicative(..))
Expand Down
58 changes: 20 additions & 38 deletions containers-tests/tests/intset-properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,15 @@ main = defaultMain $ testGroup "intset-properties"
, testProperty "prop_UnionInsert" prop_UnionInsert
, testProperty "prop_UnionAssoc" prop_UnionAssoc
, testProperty "prop_UnionComm" prop_UnionComm
, testProperty "prop_Diff" prop_Diff
, testProperty "prop_Int" prop_Int
, testProperty "prop_union" prop_union
, testProperty "prop_difference" prop_difference
, testProperty "prop_intersection" prop_intersection
, testProperty "prop_Ordered" prop_Ordered
, testProperty "prop_List" prop_List
, testProperty "prop_DescList" prop_DescList
, testProperty "prop_AscDescList" prop_AscDescList
, testProperty "prop_fromList" prop_fromList
, testProperty "prop_fromRange" prop_fromRange
, testProperty "prop_MaskPow2" prop_MaskPow2
, testProperty "prop_Prefix" prop_Prefix
, testProperty "prop_LeftRight" prop_LeftRight
, testProperty "prop_isProperSubsetOf" prop_isProperSubsetOf
, testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2
, testProperty "prop_isSubsetOf" prop_isSubsetOf
Expand Down Expand Up @@ -113,9 +111,8 @@ test_split = do
Arbitrary, reasonably balanced trees
--------------------------------------------------------------------}
instance Arbitrary IntSet where
arbitrary = do{ xs <- arbitrary
; return (fromList xs)
}
arbitrary = fromList <$> oneof [arbitrary, fmap (fmap getLarge) arbitrary]
shrink = fmap fromList . shrink . toAscList

{--------------------------------------------------------------------
Valid IntMaps
Expand Down Expand Up @@ -232,19 +229,26 @@ prop_UnionComm :: IntSet -> IntSet -> Bool
prop_UnionComm t1 t2
= (union t1 t2 == union t2 t1)

prop_Diff :: [Int] -> [Int] -> Property
prop_Diff xs ys =
case difference (fromList xs) (fromList ys) of
prop_union :: IntSet -> IntSet -> Property
prop_union xs ys =
case union xs ys of
t ->
valid t .&&.
toAscList t === List.sort ((List.\\) (nub xs) (nub ys))
toAscList t === List.nub (List.sort (toAscList xs ++ toAscList ys))

prop_Int :: [Int] -> [Int] -> Property
prop_Int xs ys =
case intersection (fromList xs) (fromList ys) of
prop_difference :: IntSet -> IntSet -> Property
prop_difference xs ys =
case difference xs ys of
t ->
valid t .&&.
toAscList t === List.sort (nub ((List.intersect) (xs) (ys)))
toAscList t === (toAscList xs List.\\ toAscList ys)

prop_intersection :: IntSet -> IntSet -> Property
prop_intersection xs ys =
case intersection xs ys of
t ->
valid t .&&.
toAscList t === (toAscList xs `List.intersect` toAscList ys)

prop_disjoint :: IntSet -> IntSet -> Bool
prop_disjoint a b = a `disjoint` b == null (a `intersection` b)
Expand Down Expand Up @@ -284,28 +288,6 @@ prop_fromRange = forAll (scale (*100) arbitrary) go
go (l,h) = valid t .&&. t === fromAscList [l..h]
where t = fromRange (l,h)

{--------------------------------------------------------------------
Bin invariants
--------------------------------------------------------------------}
powersOf2 :: IntSet
powersOf2 = fromList [2^i | i <- [0..63]]

-- Check the invariant that the mask is a power of 2.
prop_MaskPow2 :: IntSet -> Bool
prop_MaskPow2 (Bin _ msk left right) = member msk powersOf2 && prop_MaskPow2 left && prop_MaskPow2 right
prop_MaskPow2 _ = True

-- Check that the prefix satisfies its invariant.
prop_Prefix :: IntSet -> Bool
prop_Prefix s@(Bin prefix msk left right) = all (\elem -> match elem prefix msk) (toList s) && prop_Prefix left && prop_Prefix right
prop_Prefix _ = True

-- Check that the left elements don't have the mask bit set, and the right
-- ones do.
prop_LeftRight :: IntSet -> Bool
prop_LeftRight (Bin _ msk left right) = and [x .&. msk == 0 | x <- toList left] && and [x .&. msk == msk | x <- toList right]
prop_LeftRight _ = True

{--------------------------------------------------------------------
IntSet operations are like Set operations
--------------------------------------------------------------------}
Expand Down
1 change: 1 addition & 0 deletions containers/containers.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Library
Data.IntMap.Merge.Lazy
Data.IntMap.Merge.Strict
Data.IntSet.Internal
Data.IntSet.Internal.IntTreeCommons
Data.IntSet
Data.Map
Data.Map.Lazy
Expand Down
Loading

0 comments on commit 8562003

Please sign in to comment.