diff --git a/containers-tests/benchmarks/LookupGE/LookupGE_IntMap.hs b/containers-tests/benchmarks/LookupGE/LookupGE_IntMap.hs index 475d7961c..76db5252c 100644 --- a/containers-tests/benchmarks/LookupGE/LookupGE_IntMap.hs +++ b/containers-tests/benchmarks/LookupGE/LookupGE_IntMap.hs @@ -2,6 +2,8 @@ module LookupGE_IntMap where import Prelude hiding (null) +import Data.IntSet.Internal.IntTreeCommons + (Key, Prefix(..), nomatch, signBranch, left) import Data.IntMap.Internal lookupGE1 :: Key -> IntMap a -> Maybe (Key,a) diff --git a/containers-tests/containers-tests.cabal b/containers-tests/containers-tests.cabal index c226d45e0..06462891b 100644 --- a/containers-tests/containers-tests.cabal +++ b/containers-tests/containers-tests.cabal @@ -96,6 +96,7 @@ library Data.IntMap.Strict.Internal Data.IntSet Data.IntSet.Internal + Data.IntSet.Internal.IntTreeCommons Data.Map Data.Map.Internal Data.Map.Internal.Debug diff --git a/containers-tests/tests/IntMapValidity.hs b/containers-tests/tests/IntMapValidity.hs index 8067d1641..1374406c1 100644 --- a/containers-tests/tests/IntMapValidity.hs +++ b/containers-tests/tests/IntMapValidity.hs @@ -6,6 +6,7 @@ module IntMapValidity import Data.Bits (finiteBitSize, testBit, xor, (.&.)) import Data.List (intercalate, elemIndex) +import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch) import Data.IntMap.Internal import Numeric (showHex) import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.)) @@ -17,7 +18,7 @@ import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.)) valid :: IntMap a -> Property valid t = counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&. - counterexample "prefixOk" (prefixOk t) + counterexample "prefixesOk" (prefixesOk t) -- Invariant: Nil is never found as a child of Bin. nilNeverChildOfBin :: IntMap a -> Bool @@ -37,26 +38,26 @@ nilNeverChildOfBin t = -- * All keys in a Bin start with the Bin's shared prefix. -- * All keys in the Bin's left child have the Prefix's mask bit unset. -- * All keys in the Bin's right child have the Prefix's mask bit set. -prefixOk :: IntMap a -> Property -prefixOk t = - case t of - Nil -> property () - Tip _ _ -> property () - Bin p l r -> - let px = unPrefix p - m = px .&. (-px) - keysl = keys l - keysr = keys r - debugStr = concat - [ "px=" ++ showIntHex px - , ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]" - , ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]" - ] - in counterexample debugStr $ - counterexample "mask bit absent" (px /= 0) .&&. - counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&. - counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&. - counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr) +prefixesOk :: IntMap a -> Property +prefixesOk t = case t of + Nil -> property () + Tip _ _ -> property () + Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r + where + px = unPrefix p + m = px .&. (-px) + keysl = keys l + keysr = keys r + debugStr = concat + [ "px=" ++ showIntHex px + , ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]" + , ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]" + ] + currentOk = counterexample debugStr $ + counterexample "mask bit absent" (px /= 0) .&&. + counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&. + counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&. + counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr) hasPrefix :: Int -> Prefix -> Bool hasPrefix i p = not (nomatch i p) diff --git a/containers-tests/tests/IntSetValidity.hs b/containers-tests/tests/IntSetValidity.hs index 62e8ca46c..5d341eff2 100644 --- a/containers-tests/tests/IntSetValidity.hs +++ b/containers-tests/tests/IntSetValidity.hs @@ -2,7 +2,10 @@ module IntSetValidity (valid) where import Data.Bits (xor, (.&.)) +import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch) import Data.IntSet.Internal +import Data.List (intercalate) +import Numeric (showHex) import Test.Tasty.QuickCheck (Property, counterexample, property, (.&&.)) import Utils.Containers.Internal.BitUtil (bitcount) @@ -13,9 +16,7 @@ import Utils.Containers.Internal.BitUtil (bitcount) valid :: IntSet -> Property valid t = counterexample "nilNeverChildOfBin" (nilNeverChildOfBin t) .&&. - counterexample "maskPowerOfTwo" (maskPowerOfTwo t) .&&. - counterexample "commonPrefix" (commonPrefix t) .&&. - counterexample "markRespected" (maskRespected t) .&&. + counterexample "prefixesOk" (prefixesOk t) .&&. counterexample "tipsValid" (tipsValid t) -- Invariant: Nil is never found as a child of Bin. @@ -24,48 +25,41 @@ nilNeverChildOfBin t = case t of Nil -> True Tip _ _ -> True - Bin _ _ l r -> noNilInSet l && noNilInSet r + Bin _ l r -> noNilInSet l && noNilInSet r where noNilInSet t' = case t' of Nil -> False Tip _ _ -> True - Bin _ _ l' r' -> noNilInSet l' && noNilInSet r' + Bin _ l' r' -> noNilInSet l' && noNilInSet r' --- Invariant: The Mask is a power of 2. It is the largest bit position at which --- two elements of the set differ. -maskPowerOfTwo :: IntSet -> Bool -maskPowerOfTwo t = - case t of - Nil -> True - Tip _ _ -> True - Bin _ m l r -> - bitcount 0 (fromIntegral m) == 1 && maskPowerOfTwo l && maskPowerOfTwo r - --- Invariant: Prefix is the common high-order bits that all elements share to --- the left of the Mask bit. -commonPrefix :: IntSet -> Bool -commonPrefix t = - case t of - Nil -> True - Tip _ _ -> True - b@(Bin p _ l r) -> all (sharedPrefix p) (elems b) && commonPrefix l && commonPrefix r - where - sharedPrefix :: Prefix -> Int -> Bool - sharedPrefix p a = p == p .&. a +-- Invariants: +-- * All keys in a Bin start with the Bin's shared prefix. +-- * All keys in the Bin's left child have the Prefix's mask bit unset. +-- * All keys in the Bin's right child have the Prefix's mask bit set. +prefixesOk :: IntSet -> Property +prefixesOk t = case t of + Nil -> property () + Tip _ _ -> property () + Bin p l r -> currentOk .&&. prefixesOk l .&&. prefixesOk r + where + px = unPrefix p + m = px .&. (-px) + keysl = elems l + keysr = elems r + debugStr = concat + [ "px=" ++ showIntHex px + , ", keysl=[" ++ intercalate "," (fmap showIntHex keysl) ++ "]" + , ", keysr=[" ++ intercalate "," (fmap showIntHex keysr) ++ "]" + ] + currentOk = counterexample debugStr $ + counterexample "mask bit absent" (px /= 0) .&&. + counterexample "prefix not shared" (all (`hasPrefix` p) (keysl ++ keysr)) .&&. + counterexample "left child, mask found set" (all (\x -> x .&. m == 0) keysl) .&&. + counterexample "right child, mask found unset" (all (\x -> x .&. m /= 0) keysr) --- Invariant: In Bin prefix mask left right, left consists of the elements that --- don't have the mask bit set; right is all the elements that do. -maskRespected :: IntSet -> Bool -maskRespected t = - case t of - Nil -> True - Tip _ _ -> True - Bin _ binMask l r -> - all (\x -> zero x binMask) (elems l) && - all (\x -> not (zero x binMask)) (elems r) && - maskRespected l && - maskRespected r +hasPrefix :: Int -> Prefix -> Bool +hasPrefix i p = not (nomatch i p) -- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits -- (on 64 bit arches). The values of the set represented by a tip @@ -76,10 +70,10 @@ tipsValid :: IntSet -> Bool tipsValid t = case t of Nil -> True - tip@(Tip p b) -> validTipPrefix p - Bin _ _ l r -> tipsValid l && tipsValid r + tip@(Tip p b) -> validTipPrefix p && b /= 0 + Bin _ l r -> tipsValid l && tipsValid r -validTipPrefix :: Prefix -> Bool +validTipPrefix :: Int -> Bool #if WORD_SIZE_IN_BITS==32 -- Last 5 bits of the prefix must be zero for 32 bit arches. validTipPrefix p = (0x0000001F .&. p) == 0 @@ -87,3 +81,6 @@ validTipPrefix p = (0x0000001F .&. p) == 0 -- Last 6 bits of the prefix must be zero for 64 bit arches. validTipPrefix p = (0x000000000000003F .&. p) == 0 #endif + +showIntHex :: Int -> String +showIntHex x = "0x" ++ showHex (fromIntegral x :: Word) "" diff --git a/containers-tests/tests/intmap-properties.hs b/containers-tests/tests/intmap-properties.hs index ebd25bffc..c1fe9261b 100644 --- a/containers-tests/tests/intmap-properties.hs +++ b/containers-tests/tests/intmap-properties.hs @@ -10,7 +10,7 @@ import Data.IntMap.Internal (traverseMaybeWithKey) import Data.IntMap.Merge.Lazy #endif import Data.IntMap.Internal.Debug (showTree) -import Data.IntMap.Internal (Prefix(..)) +import Data.IntSet.Internal.IntTreeCommons (Prefix(..), nomatch) import IntMapValidity (hasPrefix, hasPrefixSimple, valid) import Control.Applicative (Applicative(..)) diff --git a/containers-tests/tests/intset-properties.hs b/containers-tests/tests/intset-properties.hs index a769e6de4..fefb5e87f 100644 --- a/containers-tests/tests/intset-properties.hs +++ b/containers-tests/tests/intset-properties.hs @@ -38,17 +38,15 @@ main = defaultMain $ testGroup "intset-properties" , testProperty "prop_UnionInsert" prop_UnionInsert , testProperty "prop_UnionAssoc" prop_UnionAssoc , testProperty "prop_UnionComm" prop_UnionComm - , testProperty "prop_Diff" prop_Diff - , testProperty "prop_Int" prop_Int + , testProperty "prop_union" prop_union + , testProperty "prop_difference" prop_difference + , testProperty "prop_intersection" prop_intersection , testProperty "prop_Ordered" prop_Ordered , testProperty "prop_List" prop_List , testProperty "prop_DescList" prop_DescList , testProperty "prop_AscDescList" prop_AscDescList , testProperty "prop_fromList" prop_fromList , testProperty "prop_fromRange" prop_fromRange - , testProperty "prop_MaskPow2" prop_MaskPow2 - , testProperty "prop_Prefix" prop_Prefix - , testProperty "prop_LeftRight" prop_LeftRight , testProperty "prop_isProperSubsetOf" prop_isProperSubsetOf , testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2 , testProperty "prop_isSubsetOf" prop_isSubsetOf @@ -113,9 +111,8 @@ test_split = do Arbitrary, reasonably balanced trees --------------------------------------------------------------------} instance Arbitrary IntSet where - arbitrary = do{ xs <- arbitrary - ; return (fromList xs) - } + arbitrary = fromList <$> oneof [arbitrary, fmap (fmap getLarge) arbitrary] + shrink = fmap fromList . shrink . toAscList {-------------------------------------------------------------------- Valid IntMaps @@ -232,19 +229,26 @@ prop_UnionComm :: IntSet -> IntSet -> Bool prop_UnionComm t1 t2 = (union t1 t2 == union t2 t1) -prop_Diff :: [Int] -> [Int] -> Property -prop_Diff xs ys = - case difference (fromList xs) (fromList ys) of +prop_union :: IntSet -> IntSet -> Property +prop_union xs ys = + case union xs ys of t -> valid t .&&. - toAscList t === List.sort ((List.\\) (nub xs) (nub ys)) + toAscList t === List.nub (List.sort (toAscList xs ++ toAscList ys)) -prop_Int :: [Int] -> [Int] -> Property -prop_Int xs ys = - case intersection (fromList xs) (fromList ys) of +prop_difference :: IntSet -> IntSet -> Property +prop_difference xs ys = + case difference xs ys of t -> valid t .&&. - toAscList t === List.sort (nub ((List.intersect) (xs) (ys))) + toAscList t === (toAscList xs List.\\ toAscList ys) + +prop_intersection :: IntSet -> IntSet -> Property +prop_intersection xs ys = + case intersection xs ys of + t -> + valid t .&&. + toAscList t === (toAscList xs `List.intersect` toAscList ys) prop_disjoint :: IntSet -> IntSet -> Bool prop_disjoint a b = a `disjoint` b == null (a `intersection` b) @@ -284,28 +288,6 @@ prop_fromRange = forAll (scale (*100) arbitrary) go go (l,h) = valid t .&&. t === fromAscList [l..h] where t = fromRange (l,h) -{-------------------------------------------------------------------- - Bin invariants ---------------------------------------------------------------------} -powersOf2 :: IntSet -powersOf2 = fromList [2^i | i <- [0..63]] - --- Check the invariant that the mask is a power of 2. -prop_MaskPow2 :: IntSet -> Bool -prop_MaskPow2 (Bin _ msk left right) = member msk powersOf2 && prop_MaskPow2 left && prop_MaskPow2 right -prop_MaskPow2 _ = True - --- Check that the prefix satisfies its invariant. -prop_Prefix :: IntSet -> Bool -prop_Prefix s@(Bin prefix msk left right) = all (\elem -> match elem prefix msk) (toList s) && prop_Prefix left && prop_Prefix right -prop_Prefix _ = True - --- Check that the left elements don't have the mask bit set, and the right --- ones do. -prop_LeftRight :: IntSet -> Bool -prop_LeftRight (Bin _ msk left right) = and [x .&. msk == 0 | x <- toList left] && and [x .&. msk == msk | x <- toList right] -prop_LeftRight _ = True - {-------------------------------------------------------------------- IntSet operations are like Set operations --------------------------------------------------------------------} diff --git a/containers/containers.cabal b/containers/containers.cabal index cc76c7a3f..20106611a 100644 --- a/containers/containers.cabal +++ b/containers/containers.cabal @@ -53,6 +53,7 @@ Library Data.IntMap.Merge.Lazy Data.IntMap.Merge.Strict Data.IntSet.Internal + Data.IntSet.Internal.IntTreeCommons Data.IntSet Data.Map Data.Map.Lazy diff --git a/containers/src/Data/IntMap/Internal.hs b/containers/src/Data/IntMap/Internal.hs index 333ffd98c..4c85dbe4e 100644 --- a/containers/src/Data/IntMap/Internal.hs +++ b/containers/src/Data/IntMap/Internal.hs @@ -47,14 +47,6 @@ -- @since 0.5.9 ----------------------------------------------------------------------------- --- [Note: INLINE bit fiddling] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- It is essential that the bit fiddling functions like mask, zero, branchMask --- etc are inlined. If they do not, the memory allocation skyrockets. The GHC --- usually gets it right, but it is disastrous if it does not. Therefore we --- explicitly mark these functions INLINE. - - -- [Note: Local 'go' functions and capturing] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- Care must be taken when using 'go' function which captures an argument. @@ -76,7 +68,7 @@ module Data.IntMap.Internal ( -- * Map type - IntMap(..), Key -- instance Eq,Show + IntMap(..) -- instance Eq,Show -- * Operators , (!), (!?), (\\) @@ -269,7 +261,7 @@ module Data.IntMap.Internal ( , showTreeWith -- * Internal types - , Prefix(..), Nat + , Nat -- * Utility , natFromInt @@ -280,15 +272,6 @@ module Data.IntMap.Internal ( , bin , binCheckLeft , binCheckRight - , nomatch - , left - , signBranch - , MapMapBranch(..) - , mapMapBranch - , mask - , maskW - , branchMask - , highestBitMask -- * Used by "IntMap.Merge.Lazy" and "IntMap.Merge.Strict" , mapWhenMissing @@ -316,8 +299,18 @@ import Utils.Containers.Internal.Prelude hiding (lookup, map, filter, foldr, foldl, foldl', null) import Prelude () -import Data.IntSet.Internal (Key) import qualified Data.IntSet.Internal as IntSet +import Data.IntSet.Internal.IntTreeCommons + ( Key + , Prefix(..) + , nomatch + , left + , signBranch + , mask + , branchMask + , TreeTreeBranch(..) + , treeTreeBranch + ) import Utils.Containers.Internal.BitUtil import Utils.Containers.Internal.StrictPair @@ -394,10 +387,6 @@ data IntMap a = Bin {-# UNPACK #-} !Prefix -- See Note [Okasaki-Gill] for how the implementation here relates to the one in -- Okasaki and Gill's paper. --- | A @Prefix@ represents some prefix of high-order bits of an @Int@. -newtype Prefix = Prefix { unPrefix :: Int } - deriving (Eq, Lift) - -- Some stuff from "Data.IntSet.Internal", for 'restrictKeys' and -- 'withoutKeys' to use. type IntSetPrefix = Int @@ -762,7 +751,7 @@ disjoint Nil _ = True disjoint _ Nil = True disjoint (Tip kx _) ys = notMember kx ys disjoint xs (Tip ky _) = notMember ky xs -disjoint t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case mapMapBranch p1 p2 of +disjoint t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of ABL -> disjoint l1 t2 ABR -> disjoint r1 t2 BAL -> disjoint t1 l2 @@ -1173,22 +1162,14 @@ differenceWithKey f m1 m2 -- -- @since 0.5.8 withoutKeys :: IntMap a -> IntSet.IntSet -> IntMap a -withoutKeys t1@(Bin p1 l1 r1) t2@(IntSet.Bin p2 m2 l2 r2) = case shorterMask p1 m2 of - LT -> difference1 - GT -> difference2 - EQ | p1 == p2' -> bin p1 (withoutKeys l1 l2) (withoutKeys r1 r2) - | otherwise -> t1 +withoutKeys t1@(Bin p1 l1 r1) t2@(IntSet.Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> binCheckLeft p1 (withoutKeys l1 t2) r1 + ABR -> binCheckRight p1 l1 (withoutKeys r1 t2) + BAL -> withoutKeys t1 l2 + BAR -> withoutKeys t1 r2 + EQL -> bin p1 (withoutKeys l1 l2) (withoutKeys r1 r2) + NOM -> t1 where - px1 = unPrefix p1 - p2' = Prefix (p2 .|. m2) - difference1 - | nomatch p2 p1 = t1 - | left p2 p1 = binCheckLeft p1 (withoutKeys l1 t2) r1 - | otherwise = binCheckRight p1 l1 (withoutKeys r1 t2) - difference2 - | nomatchMask px1 p2 m2 = t1 - | left px1 p2' = withoutKeys t1 l2 - | otherwise = withoutKeys t1 r2 withoutKeys t1@(Bin p1 _ _) (IntSet.Tip p2 bm2) = let px1 = unPrefix p1 minbit = bitmapOf (px1 .&. (px1-1)) @@ -1254,22 +1235,13 @@ intersection m1 m2 -- -- @since 0.5.8 restrictKeys :: IntMap a -> IntSet.IntSet -> IntMap a -restrictKeys t1@(Bin p1 l1 r1) t2@(IntSet.Bin p2 m2 l2 r2) = case shorterMask p1 m2 of - LT -> intersection1 - GT -> intersection2 - EQ | p1 == p2' -> bin p1 (restrictKeys l1 l2) (restrictKeys r1 r2) - | otherwise -> Nil - where - px1 = unPrefix p1 - p2' = Prefix (p2 .|. m2) - intersection1 - | nomatch p2 p1 = Nil - | left p2 p1 = restrictKeys l1 t2 - | otherwise = restrictKeys r1 t2 - intersection2 - | nomatchMask px1 p2 m2 = Nil - | left px1 p2' = restrictKeys t1 l2 - | otherwise = restrictKeys t1 r2 +restrictKeys t1@(Bin p1 l1 r1) t2@(IntSet.Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> restrictKeys l1 t2 + ABR -> restrictKeys r1 t2 + BAL -> restrictKeys t1 l2 + BAR -> restrictKeys t1 r2 + EQL -> bin p1 (restrictKeys l1 l2) (restrictKeys r1 r2) + NOM -> Nil restrictKeys t1@(Bin p1 _ _) (IntSet.Tip p2 bm2) = let px1 = unPrefix p1 minbit = bitmapOf (px1 .&. (px1-1)) @@ -1396,7 +1368,7 @@ mergeWithKey' :: (Prefix -> IntMap c -> IntMap c -> IntMap c) -> IntMap a -> IntMap b -> IntMap c mergeWithKey' bin' f g1 g2 = go where - go t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case mapMapBranch p1 p2 of + go t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of ABL -> bin' p1 (go l1 t2) (g1 r1) ABR -> bin' p1 (g1 l1) (go r1 t2) BAL -> bin' p2 (go t1 l2) (g2 r2) @@ -2084,7 +2056,7 @@ mergeA merge1 (Tip k1 x1) = mergeTips k1 x1 k2 x2 merge1 Nil = subsingletonBy g2k k2 x2 - go t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case mapMapBranch p1 p2 of + go t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of ABL -> binA p1 (go l1 t2) (g1t r1) ABR -> binA p1 (g1t l1) (go r1 t2) BAL -> binA p2 (go t1 l2) (g2t r2) @@ -2364,7 +2336,7 @@ isProperSubmapOfBy predicate t1 t2 _ -> False submapCmp :: (a -> b -> Bool) -> IntMap a -> IntMap b -> Ordering -submapCmp predicate t1@(Bin p1 l1 r1) (Bin p2 l2 r2) = case mapMapBranch p1 p2 of +submapCmp predicate t1@(Bin p1 l1 r1) (Bin p2 l2 r2) = case treeTreeBranch p1 p2 of ABL -> GT ABR -> GT BAL -> submapCmp predicate t1 l2 @@ -2412,7 +2384,7 @@ isSubmapOf m1 m2 > isSubmapOfBy (==) (fromList [(1,1),(2,2)]) (fromList [(1,1)]) -} isSubmapOfBy :: (a -> b -> Bool) -> IntMap a -> IntMap b -> Bool -isSubmapOfBy predicate t1@(Bin p1 l1 r1) (Bin p2 l2 r2) = case mapMapBranch p1 p2 of +isSubmapOfBy predicate t1@(Bin p1 l1 r1) (Bin p2 l2 r2) = case treeTreeBranch p1 p2 of ABL -> False ABR -> False BAL -> isSubmapOfBy predicate t1 l2 @@ -3101,8 +3073,7 @@ keysSet Nil = IntSet.Nil keysSet (Tip kx _) = IntSet.singleton kx keysSet (Bin p l r) | unPrefix p .&. IntSet.suffixBitMask == 0 - , px <- unPrefix p, m <- px .&. (-px) - = IntSet.Bin (px `xor` m) m (keysSet l) (keysSet r) + = IntSet.Bin p (keysSet l) (keysSet r) | otherwise = IntSet.Tip (unPrefix p .&. IntSet.prefixBitMask) (computeBm (computeBm 0 l) r) where computeBm !acc (Bin _ l' r') = computeBm (computeBm acc l') r' @@ -3117,7 +3088,7 @@ keysSet (Bin p l r) fromSet :: (Key -> a) -> IntSet.IntSet -> IntMap a fromSet _ IntSet.Nil = Nil -fromSet f (IntSet.Bin p m l r) = Bin (Prefix (p .|. m)) (fromSet f l) (fromSet f r) +fromSet f (IntSet.Bin p l r) = Bin p (fromSet f l) (fromSet f r) fromSet f (IntSet.Tip kx bm) = buildTree f kx bm (IntSet.suffixBitMask + 1) where -- This is slightly complicated, as we to convert the dense @@ -3540,134 +3511,6 @@ binCheckRight _ l Nil = l binCheckRight p l r = Bin p l r {-# INLINE binCheckRight #-} -{-------------------------------------------------------------------- - Branching ---------------------------------------------------------------------} - --- | A @MapMapBranch@ is returned by 'mapMapBranch' to indicate how two @Bin@s --- relate to each other. --- --- Consider that @A@ and @B@ are the @Bin@s whose @Prefix@es are given to --- @mapMapBranch@ as the first and second arguments respectively. -data MapMapBranch - = ABL -- ^ A contains B in the left child - | ABR -- ^ A contains B in the right child - | BAL -- ^ B contains A in the left child - | BAR -- ^ B contains A in the right child - | EQL -- ^ A and B have equal prefixes - | NOM -- ^ A and B have prefixes that do not match - --- | Calculates how two @Bin@s relate to each other by comparing their --- @Prefix@es. - --- Notes: --- * pw .|. (pw-1) sets every bit below the mask bit to 1. This is the greatest --- key the Bin can have. --- * pw .&. (pw-1) sets the mask bit and every bit below it to 0. This is the --- smallest key the Bin can have. --- --- First, we compare the prefixes to each other. Then we compare a prefix --- against the greatest/smallest keys the other prefix's Bin could have. This is --- enough to determine how the two Bins relate to each other. The conditions can --- be stated as: --- --- * If pw1 from Bin A is less than pw2 from Bin B, and pw2 is <= the greatest --- key of Bin A, then Bin A contains Bin B in its right child. --- * ...and so on - -mapMapBranch :: Prefix -> Prefix -> MapMapBranch -mapMapBranch p1 p2 = case compare pw1 pw2 of - LT | pw2 <= greatest pw1 -> ABR - | smallest pw2 <= pw1 -> BAL - | otherwise -> NOM - GT | pw1 <= greatest pw2 -> BAR - | smallest pw1 <= pw2 -> ABL - | otherwise -> NOM - EQ -> EQL - where - pw1 = natFromInt (unPrefix p1) - pw2 = natFromInt (unPrefix p2) - greatest pw = pw .|. (pw-1) - smallest pw = pw .&. (pw-1) -{-# INLINE mapMapBranch #-} - -{-------------------------------------------------------------------- - Endian independent bit twiddling ---------------------------------------------------------------------} - --- | Does the key @i@ differ from the prefix @p@ before getting to --- the switching bit @m@? -nomatchMask :: Int -> Int -> Int -> Bool -nomatchMask i p m - = (mask i m) /= p -{-# INLINE nomatchMask #-} - --- | Whether the @Int@ does not start with the given @Prefix@. --- --- An @Int@ starts with a @Prefix@ if it shares the high bits with the internal --- @Int@ value of the @Prefix@ up to the mask bit. --- --- @nomatch@ is usually used to determine whether a key belongs in a @Bin@, --- since all keys in a @Bin@ share a @Prefix@. - --- See also: Note [IntMap structure and invariants] -nomatch :: Int -> Prefix -> Bool -nomatch i p = (i `xor` px) .&. prefixMask /= 0 - where - px = unPrefix p - prefixMask = px `xor` (-px) -{-# INLINE nomatch #-} - --- | Whether the @Int@ is to the left of the split created by a @Bin@ with this --- @Prefix@. --- --- This does not imply that the @Int@ belongs in this @Bin@. That fact is --- usually determined first using @nomatch@. -left :: Int -> Prefix -> Bool -left i p = natFromInt i < natFromInt (unPrefix p) -{-# INLINE left #-} - --- | Whether this @Prefix@ splits a @Bin@ at the sign bit. --- --- This can only be True at the top level. --- If it is true, the left child contains non-negative keys and the right child --- contains negative keys. -signBranch :: Prefix -> Bool -signBranch p = unPrefix p == (minBound :: Int) -{-# INLINE signBranch #-} - --- | The prefix of key @i@ up to (but not including) the switching --- bit @m@. -mask :: Key -> Int -> Int -mask i m - = maskW (natFromInt i) (natFromInt m) -{-# INLINE mask #-} - - -{-------------------------------------------------------------------- - Big endian operations ---------------------------------------------------------------------} - --- | The prefix of key @i@ up to (but not including) the switching --- bit @m@. -maskW :: Nat -> Nat -> Int -maskW i m - = intFromNat (i .&. ((-m) `xor` m)) -{-# INLINE maskW #-} - --- | Whether the left prefix is shorter than the prefix length indicated by the --- right mask. -shorterMask :: Prefix -> Int -> Ordering -shorterMask p1 m2 = natFromInt m2 `compare` natFromInt (px1 .&. (-px1)) - where px1 = unPrefix p1 -{-# INLINE shorterMask #-} - --- | The first switching bit where the two prefixes disagree. -branchMask :: Int -> Int -> Int -branchMask p1 p2 - = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2)) -{-# INLINE branchMask #-} - {-------------------------------------------------------------------- Utilities --------------------------------------------------------------------} diff --git a/containers/src/Data/IntMap/Lazy.hs b/containers/src/Data/IntMap/Lazy.hs index 6158f87c8..9bc267be5 100644 --- a/containers/src/Data/IntMap/Lazy.hs +++ b/containers/src/Data/IntMap/Lazy.hs @@ -240,6 +240,7 @@ module Data.IntMap.Lazy ( #endif ) where +import Data.IntSet.Internal.IntTreeCommons (Key) import Data.IntMap.Internal as IM hiding (showTree, showTreeWith) #ifdef __GLASGOW_HASKELL__ import Data.IntMap.Internal.DeprecatedDebug diff --git a/containers/src/Data/IntMap/Strict/Internal.hs b/containers/src/Data/IntMap/Strict/Internal.hs index 73716ae7a..0726d6d47 100644 --- a/containers/src/Data/IntMap/Strict/Internal.hs +++ b/containers/src/Data/IntMap/Strict/Internal.hs @@ -265,14 +265,10 @@ import Prelude () import Data.Bits import qualified Data.IntMap.Internal as L +import Data.IntSet.Internal.IntTreeCommons + (Key, Prefix(..), nomatch, left, signBranch, mask, branchMask) import Data.IntMap.Internal ( IntMap (..) - , Key - , Prefix(..) - , mask - , branchMask - , nomatch - , left , natFromInt , intFromNat , bin @@ -281,7 +277,6 @@ import Data.IntMap.Internal , link , linkKey , linkWithMask - , signBranch , (\\) , (!) @@ -1076,7 +1071,7 @@ mapEitherWithKey f0 t0 = toPair $ go f0 t0 fromSet :: (Key -> a) -> IntSet.IntSet -> IntMap a fromSet _ IntSet.Nil = Nil -fromSet f (IntSet.Bin p m l r) = Bin (Prefix (p .|. m)) (fromSet f l) (fromSet f r) +fromSet f (IntSet.Bin p l r) = Bin p (fromSet f l) (fromSet f r) fromSet f (IntSet.Tip kx bm) = buildTree f kx bm (IntSet.suffixBitMask + 1) where -- This is slightly complicated, as we to convert the dense -- representation of IntSet into tree representation of IntMap. diff --git a/containers/src/Data/IntSet.hs b/containers/src/Data/IntSet.hs index 7d49130da..74923b409 100644 --- a/containers/src/Data/IntSet.hs +++ b/containers/src/Data/IntSet.hs @@ -155,13 +155,9 @@ module Data.IntSet ( -- * Debugging , showTree , showTreeWith - -#if defined(TESTING) - -- * Internals - , match -#endif ) where +import Data.IntSet.Internal.IntTreeCommons (Key) import Data.IntSet.Internal as IS -- $strictness diff --git a/containers/src/Data/IntSet/Internal.hs b/containers/src/Data/IntSet/Internal.hs index 40bd9bb23..40641bb1c 100644 --- a/containers/src/Data/IntSet/Internal.hs +++ b/containers/src/Data/IntSet/Internal.hs @@ -73,14 +73,6 @@ -- @since 0.5.9 ----------------------------------------------------------------------------- --- [Note: INLINE bit fiddling] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- It is essential that the bit fiddling functions like mask, zero, branchMask --- etc are inlined. If they do not, the memory allocation skyrockets. The GHC --- usually gets it right, but it is disastrous if it does not. Therefore we --- explicitly mark these functions INLINE. - - -- [Note: Local 'go' functions and capturing] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- Care must be taken when using 'go' function which captures an argument. @@ -101,8 +93,8 @@ module Data.IntSet.Internal ( -- * Set type - IntSet(..), Key -- instance Eq,Show - , Prefix, Mask, BitMap + IntSet(..) -- instance Eq,Show + , BitMap -- * Operators , (\\) @@ -187,11 +179,9 @@ module Data.IntSet.Internal ( , showTreeWith -- * Internals - , match , suffixBitMask , prefixBitMask , bitmapOf - , zero ) where import Control.Applicative (Const(..)) @@ -210,6 +200,17 @@ import Prelude () import Utils.Containers.Internal.BitUtil import Utils.Containers.Internal.StrictPair +import Data.IntSet.Internal.IntTreeCommons + ( Key + , Prefix(..) + , nomatch + , left + , signBranch + , mask + , branchMask + , TreeTreeBranch(..) + , treeTreeBranch + ) #if __GLASGOW_HASKELL__ import Data.Data (Data(..), Constr, mkConstr, constrIndex, DataType, mkDataType) @@ -257,29 +258,57 @@ m1 \\ m2 = difference m1 m2 -- | A set of integers. -- See Note: Order of constructors -data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet --- Invariant: Nil is never found as a child of Bin. --- Invariant: The Mask is a power of 2. It is the largest bit position at which --- two elements of the set differ. --- Invariant: Prefix is the common high-order bits that all elements share to --- the left of the Mask bit. --- Invariant: In Bin prefix mask left right, left consists of the elements that --- don't have the mask bit set; right is all the elements that do. - | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap --- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits --- (on 64 bit arches). The values of the set represented by a tip --- are the prefix plus the indices of the set bits in the bit map. +data IntSet = Bin {-# UNPACK #-} !Prefix + !IntSet + !IntSet + | Tip {-# UNPACK #-} !Int + {-# UNPACK #-} !BitMap | Nil --- A number stored in a set is stored as --- * Prefix (all but last 5-6 bits) and --- * BitMap (last 5-6 bits stored as a bitmask) --- Last 5-6 bits are called a Suffix. - -type Prefix = Int -type Mask = Int type BitMap = Word -type Key = Int + +-- +-- Note [IntSet structure and invariants] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- * Nil is never found as a child of Bin. +-- +-- * The Prefix of a Bin indicates the common high-order bits that all keys in +-- the Bin share. +-- +-- * The least significant set bit of the Int value of a Prefix is called the +-- mask bit. +-- +-- * All the bits to the left of the mask bit are called the shared prefix. All +-- keys stored in the Bin begin with the shared prefix. +-- +-- * All keys in the left child of the Bin have the mask bit unset, and all keys +-- in the right child have the mask bit set. It follows that +-- +-- 1. The Int value of the Prefix of a Bin is the smallest key that can be +-- present in the right child of the Bin. +-- +-- 2. All keys in the right child of a Bin are greater than keys in the +-- left child, with one exceptional situation. If the Bin separates +-- negative and non-negative keys, the mask bit is the sign bit and the +-- left child stores the non-negative keys while the right child stores the +-- negative keys. +-- +-- * All bits to the right of the mask bit are set to 0 in a Prefix. +-- +-- * The shared prefix of a Bin is never longer than +-- (WORD_SIZE - lg(WORD_SIZE) - 1) bits. +-- +-- * In the context of a Tip, the highest (WORD_SIZE - lg(WORD_SIZE)) bits of +-- a key are called "prefix" and the lowest lg(WORD_SIZE) bits are called +-- "suffix". In Tip kx bm, kx is the shared prefix and bm is a bitmask of the +-- suffixes of the keys. In other words, the keys of Tip kx bm are (kx .|. i) +-- for every set bit i in bm. +-- +-- * In Tip kx _, the lowest lg(WORD_SIZE) bits of kx are set to 0. +-- +-- * In Tip _ bm, bm is never 0. +-- #ifdef __GLASGOW_HASKELL__ -- | @since 0.6.6 @@ -334,7 +363,7 @@ null _ = False size :: IntSet -> Int size = go 0 where - go !acc (Bin _ _ l r) = go (go acc l) r + go !acc (Bin _ l r) = go (go acc l) r go acc (Tip _ bm) = acc + bitcount 0 bm go acc Nil = acc @@ -344,10 +373,10 @@ size = go 0 member :: Key -> IntSet -> Bool member !x = go where - go (Bin p m l r) - | nomatch x p m = False - | zero x m = go l - | otherwise = go r + go (Bin p l r) + | nomatch x p = False + | left x p = go l + | otherwise = go r go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0 go Nil = False @@ -363,12 +392,12 @@ notMember k = not . member k -- See Note: Local 'go' functions and capturing. lookupLT :: Key -> IntSet -> Maybe Key lookupLT !x t = case t of - Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r + Bin p l r | signBranch p -> if x >= 0 then go r l else go Nil r _ -> go Nil t where - go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r - | zero x m = go def l - | otherwise = go l r + go def (Bin p l r) | nomatch x p = if x < unPrefix p then unsafeFindMax def else unsafeFindMax r + | left x p = go def l + | otherwise = go l r go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT | otherwise = unsafeFindMax def @@ -384,12 +413,12 @@ lookupLT !x t = case t of -- See Note: Local 'go' functions and capturing. lookupGT :: Key -> IntSet -> Maybe Key lookupGT !x t = case t of - Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r + Bin p l r | signBranch p -> if x >= 0 then go Nil l else go l r _ -> go Nil t where - go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def - | zero x m = go r l - | otherwise = go def r + go def (Bin p l r) | nomatch x p = if x < unPrefix p then unsafeFindMin l else unsafeFindMin def + | left x p = go r l + | otherwise = go def r go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT | otherwise = unsafeFindMin def @@ -406,12 +435,12 @@ lookupGT !x t = case t of -- See Note: Local 'go' functions and capturing. lookupLE :: Key -> IntSet -> Maybe Key lookupLE !x t = case t of - Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r + Bin p l r | signBranch p -> if x >= 0 then go r l else go Nil r _ -> go Nil t where - go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r - | zero x m = go def l - | otherwise = go l r + go def (Bin p l r) | nomatch x p = if x < unPrefix p then unsafeFindMax def else unsafeFindMax r + | left x p = go def l + | otherwise = go l r go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE | otherwise = unsafeFindMax def @@ -428,12 +457,12 @@ lookupLE !x t = case t of -- See Note: Local 'go' functions and capturing. lookupGE :: Key -> IntSet -> Maybe Key lookupGE !x t = case t of - Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r + Bin p l r | signBranch p -> if x >= 0 then go Nil l else go l r _ -> go Nil t where - go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def - | zero x m = go r l - | otherwise = go def r + go def (Bin p l r) | nomatch x p = if x < unPrefix p then unsafeFindMin l else unsafeFindMin def + | left x p = go r l + | otherwise = go def r go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE | otherwise = unsafeFindMin def @@ -447,14 +476,14 @@ lookupGE !x t = case t of unsafeFindMin :: IntSet -> Maybe Key unsafeFindMin Nil = Nothing unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm -unsafeFindMin (Bin _ _ l _) = unsafeFindMin l +unsafeFindMin (Bin _ l _) = unsafeFindMin l -- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is -- given, it has m > 0. unsafeFindMax :: IntSet -> Maybe Key unsafeFindMax Nil = Nothing unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm -unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r +unsafeFindMax (Bin _ _ r) = unsafeFindMax r {-------------------------------------------------------------------- Construction @@ -480,11 +509,11 @@ insert :: Key -> IntSet -> IntSet insert !x = insertBM (prefixOf x) (bitmapOf x) -- Helper function for insert and union. -insertBM :: Prefix -> BitMap -> IntSet -> IntSet -insertBM !kx !bm t@(Bin p m l r) - | nomatch kx p m = link kx (Tip kx bm) p t - | zero kx m = Bin p m (insertBM kx bm l) r - | otherwise = Bin p m l (insertBM kx bm r) +insertBM :: Int -> BitMap -> IntSet -> IntSet +insertBM !kx !bm t@(Bin p l r) + | nomatch kx p = linkKey kx (Tip kx bm) p t + | left kx p = Bin p (insertBM kx bm l) r + | otherwise = Bin p l (insertBM kx bm r) insertBM kx bm t@(Tip kx' bm') | kx' == kx = Tip kx' (bm .|. bm') | otherwise = link kx (Tip kx bm) kx' t @@ -497,11 +526,11 @@ delete !x = deleteBM (prefixOf x) (bitmapOf x) -- Deletes all values mentioned in the BitMap from the set. -- Helper function for delete and difference. -deleteBM :: Prefix -> BitMap -> IntSet -> IntSet -deleteBM !kx !bm t@(Bin p m l r) - | nomatch kx p m = t - | zero kx m = bin p m (deleteBM kx bm l) r - | otherwise = bin p m l (deleteBM kx bm r) +deleteBM :: Int -> BitMap -> IntSet -> IntSet +deleteBM !kx !bm t@(Bin p l r) + | nomatch kx p = t + | left kx p = bin p (deleteBM kx bm l) r + | otherwise = bin p l (deleteBM kx bm r) deleteBM kx bm t@(Tip kx' bm') | kx' == kx = tip kx (bm' .&. complement bm) | otherwise = t @@ -553,22 +582,15 @@ unions xs -- | \(O(n+m)\). The union of two sets. union :: IntSet -> IntSet -> IntSet -union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) - | shorter m1 m2 = union1 - | shorter m2 m1 = union2 - | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2) - | otherwise = link p1 t1 p2 t2 - where - union1 | nomatch p2 p1 m1 = link p1 t1 p2 t2 - | zero p2 m1 = Bin p1 m1 (union l1 t2) r1 - | otherwise = Bin p1 m1 l1 (union r1 t2) - - union2 | nomatch p1 p2 m2 = link p1 t1 p2 t2 - | zero p1 m2 = Bin p2 m2 (union t1 l2) r2 - | otherwise = Bin p2 m2 l2 (union t1 r2) - -union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t -union t@(Bin _ _ _ _) Nil = t +union t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> Bin p1 (union l1 t2) r1 + ABR -> Bin p1 l1 (union r1 t2) + BAL -> Bin p2 (union t1 l2) r2 + BAR -> Bin p2 l2 (union t1 r2) + EQL -> Bin p1 (union l1 l2) (union r1 r2) + NOM -> link (unPrefix p1) t1 (unPrefix p2) t2 +union t@(Bin _ _ _) (Tip kx bm) = insertBM kx bm t +union t@(Bin _ _ _) Nil = t union (Tip kx bm) t = insertBM kx bm t union Nil t = t @@ -578,27 +600,21 @@ union Nil t = t --------------------------------------------------------------------} -- | \(O(n+m)\). Difference between two sets. difference :: IntSet -> IntSet -> IntSet -difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) - | shorter m1 m2 = difference1 - | shorter m2 m1 = difference2 - | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2) - | otherwise = t1 - where - difference1 | nomatch p2 p1 m1 = t1 - | zero p2 m1 = bin p1 m1 (difference l1 t2) r1 - | otherwise = bin p1 m1 l1 (difference r1 t2) - - difference2 | nomatch p1 p2 m2 = t1 - | zero p1 m2 = difference t1 l2 - | otherwise = difference t1 r2 +difference t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> bin p1 (difference l1 t2) r1 + ABR -> bin p1 l1 (difference r1 t2) + BAL -> difference t1 l2 + BAR -> difference t1 r2 + EQL -> bin p1 (difference l1 l2) (difference r1 r2) + NOM -> t1 -difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t -difference t@(Bin _ _ _ _) Nil = t +difference t@(Bin _ _ _) (Tip kx bm) = deleteBM kx bm t +difference t@(Bin _ _ _) Nil = t difference t1@(Tip kx bm) t2 = differenceTip t2 - where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1 - | zero kx m2 = differenceTip l2 - | otherwise = differenceTip r2 + where differenceTip (Bin p2 l2 r2) | nomatch kx p2 = t1 + | left kx p2 = differenceTip l2 + | otherwise = differenceTip r2 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2) | otherwise = t1 differenceTip Nil = t1 @@ -612,34 +628,28 @@ difference Nil _ = Nil --------------------------------------------------------------------} -- | \(O(n+m)\). The intersection of two sets. intersection :: IntSet -> IntSet -> IntSet -intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) - | shorter m1 m2 = intersection1 - | shorter m2 m1 = intersection2 - | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2) - | otherwise = Nil - where - intersection1 | nomatch p2 p1 m1 = Nil - | zero p2 m1 = intersection l1 t2 - | otherwise = intersection r1 t2 - - intersection2 | nomatch p1 p2 m2 = Nil - | zero p1 m2 = intersection t1 l2 - | otherwise = intersection t1 r2 - -intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1 - where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil - | zero kx2 m1 = intersectBM l1 - | otherwise = intersectBM r1 +intersection t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> intersection l1 t2 + ABR -> intersection r1 t2 + BAL -> intersection t1 l2 + BAR -> intersection t1 r2 + EQL -> bin p1 (intersection l1 l2) (intersection r1 r2) + NOM -> Nil + +intersection t1@(Bin _ _ _) (Tip kx2 bm2) = intersectBM t1 + where intersectBM (Bin p1 l1 r1) | nomatch kx2 p1 = Nil + | left kx2 p1 = intersectBM l1 + | otherwise = intersectBM r1 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2) | otherwise = Nil intersectBM Nil = Nil -intersection (Bin _ _ _ _) Nil = Nil +intersection (Bin _ _ _) Nil = Nil intersection (Tip kx1 bm1) t2 = intersectBM t2 - where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil - | zero kx1 m2 = intersectBM l2 - | otherwise = intersectBM r2 + where intersectBM (Bin p2 l2 r2) | nomatch kx1 p2 = Nil + | left kx1 p2 = intersectBM l2 + | otherwise = intersectBM r2 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2) | otherwise = Nil intersectBM Nil = Nil @@ -657,33 +667,30 @@ isProperSubsetOf t1 t2 _ -> False subsetCmp :: IntSet -> IntSet -> Ordering -subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) - | shorter m1 m2 = GT - | shorter m2 m1 = case subsetCmpLt of - GT -> GT - _ -> LT - | p1 == p2 = subsetCmpEq - | otherwise = GT -- disjoint +subsetCmp t1@(Bin p1 l1 r1) (Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> GT + ABR -> GT + BAL -> case subsetCmp t1 l2 of GT -> GT ; _ -> LT + BAR -> case subsetCmp t1 r2 of GT -> GT ; _ -> LT + EQL -> subsetCmpEq + NOM -> GT -- disjoint where - subsetCmpLt | nomatch p1 p2 m2 = GT - | zero p1 m2 = subsetCmp t1 l2 - | otherwise = subsetCmp t1 r2 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of (GT,_ ) -> GT (_ ,GT) -> GT (EQ,EQ) -> EQ _ -> LT -subsetCmp (Bin _ _ _ _) _ = GT +subsetCmp (Bin _ _ _) _ = GT subsetCmp (Tip kx1 bm1) (Tip kx2 bm2) | kx1 /= kx2 = GT -- disjoint | bm1 == bm2 = EQ | bm1 .&. complement bm2 == 0 = LT | otherwise = GT -subsetCmp t1@(Tip kx _) (Bin p m l r) - | nomatch kx p m = GT - | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT - | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT +subsetCmp t1@(Tip kx _) (Bin p l r) + | nomatch kx p = GT + | left kx p = case subsetCmp t1 l of GT -> GT ; _ -> LT + | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT subsetCmp (Tip _ _) Nil = GT -- disjoint subsetCmp Nil Nil = EQ subsetCmp Nil _ = LT @@ -692,17 +699,19 @@ subsetCmp Nil _ = LT -- @(s1 \`isSubsetOf\` s2)@ tells whether @s1@ is a subset of @s2@. isSubsetOf :: IntSet -> IntSet -> Bool -isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) - | shorter m1 m2 = False - | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2 - else isSubsetOf t1 r2) - | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2 -isSubsetOf (Bin _ _ _ _) _ = False +isSubsetOf t1@(Bin p1 l1 r1) (Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> False + ABR -> False + BAL -> isSubsetOf t1 l2 + BAR -> isSubsetOf t1 r2 + EQL -> isSubsetOf l1 l2 && isSubsetOf r1 r2 + NOM -> False +isSubsetOf (Bin _ _ _) _ = False isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0 -isSubsetOf t1@(Tip kx _) (Bin p m l r) - | nomatch kx p m = False - | zero kx m = isSubsetOf t1 l - | otherwise = isSubsetOf t1 r +isSubsetOf t1@(Tip kx _) (Bin p l r) + | nomatch kx p = False + | left kx p = isSubsetOf t1 l + | otherwise = isSubsetOf t1 r isSubsetOf (Tip _ _) Nil = False isSubsetOf Nil _ = True @@ -720,34 +729,28 @@ isSubsetOf Nil _ = True -- -- @since 0.5.11 disjoint :: IntSet -> IntSet -> Bool -disjoint t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) - | shorter m1 m2 = disjoint1 - | shorter m2 m1 = disjoint2 - | p1 == p2 = disjoint l1 l2 && disjoint r1 r2 - | otherwise = True - where - disjoint1 | nomatch p2 p1 m1 = True - | zero p2 m1 = disjoint l1 t2 - | otherwise = disjoint r1 t2 - - disjoint2 | nomatch p1 p2 m2 = True - | zero p1 m2 = disjoint t1 l2 - | otherwise = disjoint t1 r2 - -disjoint t1@(Bin _ _ _ _) (Tip kx2 bm2) = disjointBM t1 - where disjointBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = True - | zero kx2 m1 = disjointBM l1 - | otherwise = disjointBM r1 +disjoint t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of + ABL -> disjoint l1 t2 + ABR -> disjoint r1 t2 + BAL -> disjoint t1 l2 + BAR -> disjoint t1 r2 + EQL -> disjoint l1 l2 && disjoint r1 r2 + NOM -> True + +disjoint t1@(Bin _ _ _) (Tip kx2 bm2) = disjointBM t1 + where disjointBM (Bin p1 l1 r1) | nomatch kx2 p1 = True + | left kx2 p1 = disjointBM l1 + | otherwise = disjointBM r1 disjointBM (Tip kx1 bm1) | kx1 == kx2 = (bm1 .&. bm2) == 0 | otherwise = True disjointBM Nil = True -disjoint (Bin _ _ _ _) Nil = True +disjoint (Bin _ _ _) Nil = True disjoint (Tip kx1 bm1) t2 = disjointBM t2 - where disjointBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = True - | zero kx1 m2 = disjointBM l2 - | otherwise = disjointBM r2 + where disjointBM (Bin p2 l2 r2) | nomatch kx1 p2 = True + | left kx1 p2 = disjointBM l2 + | otherwise = disjointBM r2 disjointBM (Tip kx2 bm2) | kx1 == kx2 = (bm1 .&. bm2) == 0 | otherwise = True disjointBM Nil = True @@ -762,8 +765,8 @@ disjoint Nil _ = True filter :: (Key -> Bool) -> IntSet -> IntSet filter predicate t = case t of - Bin p m l r - -> bin p m (filter predicate l) (filter predicate r) + Bin p l r + -> bin p (filter predicate l) (filter predicate r) Tip kx bm -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm) Nil -> Nil @@ -777,10 +780,10 @@ partition predicate0 t0 = toPair $ go predicate0 t0 where go predicate t = case t of - Bin p m l r + Bin p l r -> let (l1 :*: l2) = go predicate l (r1 :*: r2) = go predicate r - in bin p m l1 r1 :*: bin p m l2 r2 + in bin p l1 r1 :*: bin p l2 r2 Tip kx bm -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm in tip kx bm1 :*: tip kx (bm `xor` bm1) @@ -802,16 +805,16 @@ partition predicate0 t0 = toPair $ go predicate0 t0 takeWhileAntitone :: (Key -> Bool) -> IntSet -> IntSet takeWhileAntitone predicate t = case t of - Bin p m l r - | m < 0 -> + Bin p l r + | signBranch p -> if predicate 0 -- handle negative numbers. - then bin p m (go predicate l) r + then bin p (go predicate l) r else go predicate r _ -> go predicate t where - go predicate' (Bin p m l r) - | predicate' $! p+m = bin p m l (go predicate' r) - | otherwise = go predicate' l + go predicate' (Bin p l r) + | predicate' (unPrefix p) = bin p l (go predicate' r) + | otherwise = go predicate' l go predicate' (Tip kx bm) = tip kx (takeWhileAntitoneBits kx predicate' bm) go _ Nil = Nil @@ -828,16 +831,16 @@ takeWhileAntitone predicate t = dropWhileAntitone :: (Key -> Bool) -> IntSet -> IntSet dropWhileAntitone predicate t = case t of - Bin p m l r - | m < 0 -> + Bin p l r + | signBranch p -> if predicate 0 -- handle negative numbers. then go predicate l - else bin p m l (go predicate r) + else bin p l (go predicate r) _ -> go predicate t where - go predicate' (Bin p m l r) - | predicate' $! p+m = go predicate' r - | otherwise = bin p m (go predicate' l) r + go predicate' (Bin p l r) + | predicate' (unPrefix p) = go predicate' r + | otherwise = bin p (go predicate' l) r go predicate' (Tip kx bm) = tip kx (bm `xor` takeWhileAntitoneBits kx predicate' bm) go _ Nil = Nil @@ -856,25 +859,25 @@ dropWhileAntitone predicate t = spanAntitone :: (Key -> Bool) -> IntSet -> (IntSet, IntSet) spanAntitone predicate t = case t of - Bin p m l r - | m < 0 -> + Bin p l r + | signBranch p -> if predicate 0 -- handle negative numbers. then case go predicate l of (lt :*: gt) -> - let !lt' = bin p m lt r + let !lt' = bin p lt r in (lt', gt) else case go predicate r of (lt :*: gt) -> - let !gt' = bin p m l gt + let !gt' = bin p l gt in (lt, gt') _ -> case go predicate t of (lt :*: gt) -> (lt, gt) where - go predicate' (Bin p m l r) - | predicate' $! p+m = case go predicate' r of (lt :*: gt) -> bin p m l lt :*: gt - | otherwise = case go predicate' l of (lt :*: gt) -> lt :*: bin p m gt r + go predicate' (Bin p l r) + | predicate' (unPrefix p) = case go predicate' r of (lt :*: gt) -> bin p l lt :*: gt + | otherwise = case go predicate' l of (lt :*: gt) -> lt :*: bin p gt r go predicate' (Tip kx bm) = let bm' = takeWhileAntitoneBits kx predicate' bm in (tip kx bm' :*: tip kx (bm `xor` bm')) go _ Nil = (Nil :*: Nil) @@ -887,26 +890,26 @@ spanAntitone predicate t = split :: Key -> IntSet -> (IntSet,IntSet) split x t = case t of - Bin p m l r - | m < 0 -> + Bin p l r + | signBranch p -> if x >= 0 -- handle negative numbers. then case go x l of (lt :*: gt) -> - let !lt' = bin p m lt r + let !lt' = bin p lt r in (lt', gt) else case go x r of (lt :*: gt) -> - let !gt' = bin p m l gt + let !gt' = bin p l gt in (lt, gt') _ -> case go x t of (lt :*: gt) -> (lt, gt) where - go !x' t'@(Bin p m l r) - | nomatch x' p m = if x' < p then (Nil :*: t') else (t' :*: Nil) - | zero x' m = case go x' l of (lt :*: gt) -> lt :*: bin p m gt r - | otherwise = case go x' r of (lt :*: gt) -> bin p m l lt :*: gt + go !x' t'@(Bin p l r) + | nomatch x' p = if x' < unPrefix p then (Nil :*: t') else (t' :*: Nil) + | left x' p = case go x' l of (lt :*: gt) -> lt :*: bin p gt r + | otherwise = case go x' r of (lt :*: gt) -> bin p l lt :*: gt go x' t'@(Tip kx' bm) | kx' > x' = (Nil :*: t') -- equivalent to kx' > prefixOf x' @@ -921,32 +924,32 @@ split x t = splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet) splitMember x t = case t of - Bin p m l r - | m < 0 -> + Bin p l r + | signBranch p -> if x >= 0 -- handle negative numbers. then case go x l of (lt, fnd, gt) -> - let !lt' = bin p m lt r + let !lt' = bin p lt r in (lt', fnd, gt) else case go x r of (lt, fnd, gt) -> - let !gt' = bin p m l gt + let !gt' = bin p l gt in (lt, fnd, gt') _ -> go x t where - go !x' t'@(Bin p m l r) - | nomatch x' p m = if x' < p then (Nil, False, t') else (t', False, Nil) - | zero x' m = + go !x' t'@(Bin p l r) + | nomatch x' p = if x' < unPrefix p then (Nil, False, t') else (t', False, Nil) + | left x' p = case go x' l of (lt, fnd, gt) -> - let !gt' = bin p m gt r + let !gt' = bin p gt r in (lt, fnd, gt') | otherwise = case go x' r of (lt, fnd, gt) -> - let !lt' = bin p m l lt + let !lt' = bin p l lt in (lt', fnd, gt) go x' t'@(Tip kx' bm) | kx' > x' = (Nil, False, t') @@ -970,10 +973,10 @@ splitMember x t = maxView :: IntSet -> Maybe (Key, IntSet) maxView t = case t of Nil -> Nothing - Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r) + Bin p l r | signBranch p -> case go l of (result, l') -> Just (result, bin p l' r) _ -> Just (go t) where - go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r') + go (Bin p l r) = case go r of (result, r') -> (result, bin p l r') go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi))) go Nil = error "maxView Nil" @@ -982,10 +985,10 @@ maxView t = minView :: IntSet -> Maybe (Key, IntSet) minView t = case t of Nil -> Nothing - Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r') + Bin p l r | signBranch p -> case go r of (result, r') -> Just (result, bin p l r') _ -> Just (go t) where - go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r) + go (Bin p l r) = case go l of (result, l') -> (result, bin p l' r) go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi))) go Nil = error "minView Nil" @@ -1006,23 +1009,23 @@ deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal elemen findMin :: IntSet -> Key findMin Nil = error "findMin: empty set has no minimal element" findMin (Tip kx bm) = kx + lowestBitSet bm -findMin (Bin _ m l r) - | m < 0 = find r +findMin (Bin p l r) + | signBranch p = find r | otherwise = find l where find (Tip kx bm) = kx + lowestBitSet bm - find (Bin _ _ l' _) = find l' - find Nil = error "findMin Nil" + find (Bin _ l' _) = find l' + find Nil = error "findMin Nil" -- | \(O(\min(n,W))\). The maximal element of a set. findMax :: IntSet -> Key findMax Nil = error "findMax: empty set has no maximal element" findMax (Tip kx bm) = kx + highestBitSet bm -findMax (Bin _ m l r) - | m < 0 = find l +findMax (Bin p l r) + | signBranch p = find l | otherwise = find r where find (Tip kx bm) = kx + highestBitSet bm - find (Bin _ _ _ r') = find r' - find Nil = error "findMax Nil" + find (Bin _ _ r') = find r' + find Nil = error "findMax Nil" -- | \(O(\min(n,W))\). Delete the minimal element. Returns an empty set if the set is empty. @@ -1089,13 +1092,13 @@ fold = foldr -- > toAscList set = foldr (:) [] set foldr :: (Key -> b -> b) -> b -> IntSet -> b foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only. - case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before - | otherwise -> go (go z r) l + case t of Bin p l r | signBranch p -> go (go z l) r -- put negative numbers before + | otherwise -> go (go z r) l _ -> go z t where - go z' Nil = z' - go z' (Tip kx bm) = foldrBits kx f z' bm - go z' (Bin _ _ l r) = go (go z' r) l + go z' Nil = z' + go z' (Tip kx bm) = foldrBits kx f z' bm + go z' (Bin _ l r) = go (go z' r) l {-# INLINE foldr #-} -- | \(O(n)\). A strict version of 'foldr'. Each application of the operator is @@ -1103,13 +1106,13 @@ foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only. -- function is strict in the starting value. foldr' :: (Key -> b -> b) -> b -> IntSet -> b foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only. - case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before - | otherwise -> go (go z r) l + case t of Bin p l r | signBranch p -> go (go z l) r -- put negative numbers before + | otherwise -> go (go z r) l _ -> go z t where - go !z' Nil = z' - go z' (Tip kx bm) = foldr'Bits kx f z' bm - go z' (Bin _ _ l r) = go (go z' r) l + go !z' Nil = z' + go z' (Tip kx bm) = foldr'Bits kx f z' bm + go z' (Bin _ l r) = go (go z' r) l {-# INLINE foldr' #-} -- | \(O(n)\). Fold the elements in the set using the given left-associative @@ -1120,13 +1123,13 @@ foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only. -- > toDescList set = foldl (flip (:)) [] set foldl :: (a -> Key -> a) -> a -> IntSet -> a foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only. - case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before - | otherwise -> go (go z l) r + case t of Bin p l r | signBranch p -> go (go z r) l -- put negative numbers before + | otherwise -> go (go z l) r _ -> go z t where - go z' Nil = z' - go z' (Tip kx bm) = foldlBits kx f z' bm - go z' (Bin _ _ l r) = go (go z' l) r + go z' Nil = z' + go z' (Tip kx bm) = foldlBits kx f z' bm + go z' (Bin _ l r) = go (go z' l) r {-# INLINE foldl #-} -- | \(O(n)\). A strict version of 'foldl'. Each application of the operator is @@ -1134,13 +1137,13 @@ foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only. -- function is strict in the starting value. foldl' :: (a -> Key -> a) -> a -> IntSet -> a foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only. - case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before - | otherwise -> go (go z l) r + case t of Bin p l r | signBranch p -> go (go z r) l -- put negative numbers before + | otherwise -> go (go z l) r _ -> go z t where - go !z' Nil = z' - go z' (Tip kx bm) = foldl'Bits kx f z' bm - go z' (Bin _ _ l r) = go (go z' l) r + go !z' Nil = z' + go z' (Tip kx bm) = foldl'Bits kx f z' bm + go z' (Bin _ l r) = go (go z' l) r {-# INLINE foldl' #-} {-------------------------------------------------------------------- @@ -1227,46 +1230,45 @@ fromRange (lx,rx) | lp == rp = Tip lp (bitmapOf rx `shiftLL` 1 - bitmapOf lx) | otherwise = let m = branchMask lx rx - p = mask lx m - in if m < 0 -- handle negative numbers - then Bin 0 m (goR 0) (goL 0) - else Bin p m (goL (p .|. m)) (goR (p .|. m)) + p = Prefix (mask lx m .|. m) + in if signBranch p -- handle negative numbers + then Bin p (goR 0) (goL 0) + else Bin p (goL (unPrefix p)) (goR (unPrefix p)) where lp = prefixOf lx rp = prefixOf rx -- goL p0 = fromList [lx .. p0-1] -- Expected: p0 is lx where one 0-bit is flipped to 1 and all bits lower than that are 0. -- p0 can be 0 (pretend that bit WORD_SIZE is flipped to 1). - goL :: Prefix -> IntSet + goL :: Int -> IntSet goL !p0 = go (Tip lp (- bitmapOf lx)) (lp + lbm prefixBitMask) where go !l p | p == p0 = l go l p = let m = lbm p - p' = p `xor` m - l' = Bin p' m l (goFull p (shr1 m)) + l' = Bin (Prefix p) l (goFull p (shr1 m)) in go l' (p + m) -- goR p0 = fromList [p0 .. rx] -- Expected: p0 is a prefix of rx - goR :: Prefix -> IntSet + goR :: Int -> IntSet goR !p0 = go (Tip rp (bitmapOf rx `shiftLL` 1 - 1)) rp where go !r p | p == p0 = r go r p = let m = lbm p p' = p `xor` m - r' = Bin p' m (goFull p' (shr1 m)) r + r' = Bin (Prefix p) (goFull p' (shr1 m)) r in go r' p' -- goFull p m = fromList [p .. p+2*m-1] -- Expected: popCount m == 1, p == mask p m - goFull :: Prefix -> Mask -> IntSet + goFull :: Int -> Int -> IntSet goFull p m | m < suffixBitMask = Tip p (complement 0) - | otherwise = Bin p m (goFull p (shr1 m)) (goFull (p .|. m) (shr1 m)) - lbm :: Prefix -> Prefix + | otherwise = Bin (Prefix (p .|. m)) (goFull p (shr1 m)) (goFull (p .|. m) (shr1 m)) + lbm :: Int -> Int lbm p = intFromNat (lowestBitMask (natFromInt p)) {-# INLINE lbm #-} - shr1 :: Mask -> Mask + shr1 :: Int -> Int shr1 m = intFromNat (natFromInt m `shiftRL` 1) {-# INLINE shr1 #-} @@ -1302,7 +1304,7 @@ fromMonoList (kx : zs1) = addAll' (prefixOf kx) (bitmapOf kx) zs1 | py <- prefixOf ky , m <- branchMask px py , Inserted ty zs' <- addMany' m py (bitmapOf ky) zs - = addAll px (linkWithMask m py ty {-px-} (Tip px bm)) zs' + = addAll px (linkWithMask m py ty px (Tip px bm)) zs' -- for `addAll` and `addMany`, px is /a/ prefix inside the tree `tx` -- `addAll` consumes the rest of the list, adding to the tree `tx` @@ -1312,7 +1314,7 @@ fromMonoList (kx : zs1) = addAll' (prefixOf kx) (bitmapOf kx) zs1 | py <- prefixOf ky , m <- branchMask px py , Inserted ty zs' <- addMany' m py (bitmapOf ky) zs - = addAll px (linkWithMask m py ty {-px-} tx) zs' + = addAll px (linkWithMask m py ty px tx) zs' -- `addMany'` is similar to `addAll'`, but proceeds with `addMany'`. addMany' !_m !px !bm [] @@ -1326,7 +1328,7 @@ fromMonoList (kx : zs1) = addAll' (prefixOf kx) (bitmapOf kx) zs1 | py <- prefixOf ky , mxy <- branchMask px py , Inserted ty zs' <- addMany' mxy py (bitmapOf ky) zs - = addMany m px (linkWithMask mxy py ty {-px-} (Tip px bm)) zs' + = addMany m px (linkWithMask mxy py ty px (Tip px bm)) zs' -- `addAll` adds to `tx` all keys whose prefix w.r.t. `m` agrees with `px`. addMany !_m !_px tx [] @@ -1337,7 +1339,7 @@ fromMonoList (kx : zs1) = addAll' (prefixOf kx) (bitmapOf kx) zs1 | py <- prefixOf ky , mxy <- branchMask px py , Inserted ty zs' <- addMany' mxy py (bitmapOf ky) zs - = addMany m px (linkWithMask mxy py ty {-px-} tx) zs' + = addMany m px (linkWithMask mxy py ty px tx) zs' {-# INLINE fromMonoList #-} data Inserted = Inserted !IntSet ![Key] @@ -1350,16 +1352,16 @@ instance Eq IntSet where t1 /= t2 = nequal t1 t2 equal :: IntSet -> IntSet -> Bool -equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) - = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2) +equal (Bin p1 l1 r1) (Bin p2 l2 r2) + = (p1 == p2) && (equal l1 l2) && (equal r1 r2) equal (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 == bm2 equal Nil Nil = True equal _ _ = False nequal :: IntSet -> IntSet -> Bool -nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) - = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2) +nequal (Bin p1 l1 r1) (Bin p2 l2 r2) + = (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2) nequal (Tip kx1 bm1) (Tip kx2 bm2) = kx1 /= kx2 || bm1 /= bm2 nequal Nil Nil = False @@ -1430,10 +1432,10 @@ showTreeWith hang wide t showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS showsTree wide lbars rbars t = case t of - Bin p m l r + Bin p l r -> showsTree wide (withBar rbars) (withEmpty rbars) r . showWide wide rbars . - showsBars lbars . showString (showBin p m) . showString "\n" . + showsBars lbars . showString (showBin p) . showString "\n" . showWide wide lbars . showsTree wide (withEmpty lbars) (withBar lbars) l Tip kx bm @@ -1444,8 +1446,8 @@ showsTree wide lbars rbars t showsTreeHang :: Bool -> [String] -> IntSet -> ShowS showsTreeHang wide bars t = case t of - Bin p m l r - -> showsBars bars . showString (showBin p m) . showString "\n" . + Bin p l r + -> showsBars bars . showString (showBin p) . showString "\n" . showWide wide bars . showsTreeHang wide (withBar bars) l . showWide wide bars . @@ -1455,8 +1457,8 @@ showsTreeHang wide bars t showsBitMap bm . showString "\n" Nil -> showsBars bars . showString "|\n" -showBin :: Prefix -> Mask -> String -showBin _ _ +showBin :: Prefix -> String +showBin _ = "*" -- ++ show (p,m) showWide :: Bool -> [String] -> String -> String @@ -1488,32 +1490,43 @@ withEmpty bars = " ":bars {-------------------------------------------------------------------- Link --------------------------------------------------------------------} -link :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet -link p1 t1 p2 t2 = linkWithMask (branchMask p1 p2) p1 t1 {-p2-} t2 + +-- | Link two @IntSet@s. The sets must not be empty. The @Prefix@es of the two +-- sets must be different. @k1@ must share the prefix of @t1@. @p2@ must be the +-- prefix of @t2@. +linkKey :: Key -> IntSet -> Prefix -> IntSet -> IntSet +linkKey k1 t1 p2 t2 = link k1 t1 (unPrefix p2) t2 +{-# INLINE linkKey #-} + +-- | Link two @IntSets. The sets must not be empty. The @Prefix@es of the two +-- sets must be different. @k1@ must share the prefix of @t1@ and @k2@ must +-- share the prefix of @t2@. +link :: Int -> IntSet -> Int -> IntSet -> IntSet +link k1 t1 k2 t2 = linkWithMask (branchMask k1 k2) k1 t1 k2 t2 {-# INLINE link #-} -- `linkWithMask` is useful when the `branchMask` has already been computed -linkWithMask :: Mask -> Prefix -> IntSet -> IntSet -> IntSet -linkWithMask m p1 t1 {-p2-} t2 - | zero p1 m = Bin p m t1 t2 - | otherwise = Bin p m t2 t1 +linkWithMask :: Int -> Key -> IntSet -> Key -> IntSet -> IntSet +linkWithMask m k1 t1 k2 t2 + | natFromInt k1 < natFromInt k2 = Bin p t1 t2 + | otherwise = Bin p t2 t1 where - p = mask p1 m + p = Prefix (mask k1 m .|. m) {-# INLINE linkWithMask #-} {-------------------------------------------------------------------- @bin@ assures that we never have empty trees within a tree. --------------------------------------------------------------------} -bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet -bin _ _ l Nil = l -bin _ _ Nil r = r -bin p m l r = Bin p m l r +bin :: Prefix -> IntSet -> IntSet -> IntSet +bin _ l Nil = l +bin _ Nil r = r +bin p l r = Bin p l r {-# INLINE bin #-} {-------------------------------------------------------------------- @tip@ assures that we never have empty bitmaps within a tree. --------------------------------------------------------------------} -tip :: Prefix -> BitMap -> IntSet +tip :: Int -> BitMap -> IntSet tip _ 0 = Nil tip kx bm = Tip kx bm {-# INLINE tip #-} @@ -1531,7 +1544,7 @@ prefixBitMask :: Int prefixBitMask = complement suffixBitMask {-# INLINE prefixBitMask #-} -prefixOf :: Int -> Prefix +prefixOf :: Int -> Int prefixOf x = x .&. prefixBitMask {-# INLINE prefixOf #-} @@ -1548,49 +1561,6 @@ bitmapOf x = bitmapOfSuffix (suffixOf x) {-# INLINE bitmapOf #-} -{-------------------------------------------------------------------- - Endian independent bit twiddling ---------------------------------------------------------------------} --- Returns True iff the bits set in i and the Mask m are disjoint. -zero :: Int -> Mask -> Bool -zero i m - = (natFromInt i) .&. (natFromInt m) == 0 -{-# INLINE zero #-} - -nomatch,match :: Int -> Prefix -> Mask -> Bool -nomatch i p m - = (mask i m) /= p -{-# INLINE nomatch #-} - -match i p m - = (mask i m) == p -{-# INLINE match #-} - --- Suppose a is largest such that 2^a divides 2*m. --- Then mask i m is i with the low a bits zeroed out. -mask :: Int -> Mask -> Prefix -mask i m - = maskW (natFromInt i) (natFromInt m) -{-# INLINE mask #-} - -{-------------------------------------------------------------------- - Big endian operations ---------------------------------------------------------------------} -maskW :: Nat -> Nat -> Prefix -maskW i m - = intFromNat (i .&. (complement (m-1) `xor` m)) -{-# INLINE maskW #-} - -shorter :: Mask -> Mask -> Bool -shorter m1 m2 - = (natFromInt m1) > (natFromInt m2) -{-# INLINE shorter #-} - -branchMask :: Prefix -> Prefix -> Mask -branchMask p1 p2 - = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2)) -{-# INLINE branchMask #-} - {---------------------------------------------------------------------- To get best performance, we provide fast implementations of lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC. @@ -1820,6 +1790,6 @@ splitRoot :: IntSet -> [IntSet] splitRoot Nil = [] -- NOTE: we don't currently split below Tip, but we could. splitRoot x@(Tip _ _) = [x] -splitRoot (Bin _ m l r) | m < 0 = [r, l] - | otherwise = [l, r] +splitRoot (Bin p l r) | signBranch p = [r, l] + | otherwise = [l, r] {-# INLINE splitRoot #-} diff --git a/containers/src/Data/IntSet/Internal/IntTreeCommons.hs b/containers/src/Data/IntSet/Internal/IntTreeCommons.hs new file mode 100644 index 000000000..8d8ef486f --- /dev/null +++ b/containers/src/Data/IntSet/Internal/IntTreeCommons.hs @@ -0,0 +1,173 @@ +{-# LANGUAGE CPP #-} +#ifdef __GLASGOW_HASKELL__ +{-# LANGUAGE DeriveLift #-} +{-# LANGUAGE StandaloneDeriving #-} +#endif + +-- | +-- = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- +-- This module defines common constructs used by both "Data.IntSet" and +-- "Data.IntMap". +-- +-- @since FIXME +-- + +module Data.IntSet.Internal.IntTreeCommons + ( Key + , Prefix(..) + , nomatch + , left + , signBranch + , TreeTreeBranch(..) + , treeTreeBranch + , mask + , branchMask + ) where + +import Data.Bits (Bits(..)) +import Utils.Containers.Internal.BitUtil (highestBitMask) + +#ifdef __GLASGOW_HASKELL__ +import Language.Haskell.TH.Syntax (Lift) +-- See Note [ Template Haskell Dependencies ] +import Language.Haskell.TH () +#endif + + +type Key = Int + +-- | A @Prefix@ represents some prefix of high-order bits of an @Int@. +-- +-- A @Prefix@ is usually considered in the context of a +-- 'Data.IntSet.Internal.Bin' or 'Data.IntMap.Internal.Bin'. + +-- See Note [IntSet structure and invariants] in Data.IntSet.Internal and +-- Note [IntMap structure and invariants] in Data.IntMap.Internal for details. +newtype Prefix = Prefix { unPrefix :: Int } + deriving Eq + +#ifdef __GLASGOW_HASKELL__ +deriving instance Lift Prefix +#endif + +-- | Whether the @Int@ does not start with the given @Prefix@. +-- +-- An @Int@ starts with a @Prefix@ if it shares the high bits with the internal +-- @Int@ value of the @Prefix@ up to the mask bit. +-- +-- @nomatch@ is usually used to determine whether a key belongs in a @Bin@, +-- since all keys in a @Bin@ share a @Prefix@. +nomatch :: Int -> Prefix -> Bool +nomatch i p = (i `xor` px) .&. prefixMask /= 0 + where + px = unPrefix p + prefixMask = px `xor` (-px) +{-# INLINE nomatch #-} + +-- | Whether the @Int@ is to the left of the split created by a @Bin@ with this +-- @Prefix@. +-- +-- This does not imply that the @Int@ belongs in this @Bin@. That fact is +-- usually determined first using @nomatch@. +left :: Int -> Prefix -> Bool +left i p = i2w i < i2w (unPrefix p) +{-# INLINE left #-} + +-- | A @TreeTreeBranch@ is returned by 'treeTreeBranch' to indicate how two +-- @Bin@s relate to each other. +-- +-- Consider that @A@ and @B@ are the @Bin@s whose @Prefix@es are given to +-- @treeTreeBranch@ as the first and second arguments respectively. +data TreeTreeBranch + = ABL -- ^ A contains B in the left child + | ABR -- ^ A contains B in the right child + | BAL -- ^ B contains A in the left child + | BAR -- ^ B contains A in the right child + | EQL -- ^ A and B have equal prefixes + | NOM -- ^ A and B have prefixes that do not match + +-- | Calculates how two @Bin@s relate to each other by comparing their +-- @Prefix@es. + +-- Notes: +-- * pw .|. (pw-1) sets every bit below the mask bit to 1. This is the greatest +-- key the Bin can have. +-- * pw .&. (pw-1) sets the mask bit and every bit below it to 0. This is the +-- smallest key the Bin can have. +-- +-- First, we compare the prefixes to each other. Then we compare a prefix +-- against the greatest/smallest keys the other prefix's Bin could have. This is +-- enough to determine how the two Bins relate to each other. The conditions can +-- be stated as: +-- +-- * If pw1 from Bin A is less than pw2 from Bin B, and pw2 is <= the greatest +-- key of Bin A, then Bin A contains Bin B in its right child. +-- * ...and so on + +treeTreeBranch :: Prefix -> Prefix -> TreeTreeBranch +treeTreeBranch p1 p2 = case compare pw1 pw2 of + LT | pw2 <= greatest pw1 -> ABR + | smallest pw2 <= pw1 -> BAL + | otherwise -> NOM + GT | pw1 <= greatest pw2 -> BAR + | smallest pw1 <= pw2 -> ABL + | otherwise -> NOM + EQ -> EQL + where + pw1 = i2w (unPrefix p1) + pw2 = i2w (unPrefix p2) + greatest pw = pw .|. (pw-1) + smallest pw = pw .&. (pw-1) +{-# INLINE treeTreeBranch #-} + +-- | Whether this @Prefix@ splits a @Bin@ at the sign bit. +-- +-- This can only be True at the top level. +-- If it is true, the left child contains non-negative keys and the right child +-- contains negative keys. +signBranch :: Prefix -> Bool +signBranch p = unPrefix p == (minBound :: Int) +{-# INLINE signBranch #-} + +-- | The prefix of key @i@ up to (but not including) the switching +-- bit @m@. +mask :: Key -> Int -> Int +mask i m = i .&. ((-m) `xor` m) +{-# INLINE mask #-} + +-- | The first switching bit where the two prefixes disagree. +branchMask :: Int -> Int -> Int +branchMask p1 p2 = w2i (highestBitMask (i2w (p1 `xor` p2))) +{-# INLINE branchMask #-} + +i2w :: Int -> Word +i2w = fromIntegral +{-# INLINE i2w #-} + +w2i :: Word -> Int +w2i = fromIntegral +{-# INLINE w2i #-} + +{-------------------------------------------------------------------- + Notes +--------------------------------------------------------------------} + +-- Note [INLINE bit fiddling] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- It is essential that the bit fiddling functions like nomatch, mask, +-- branchMask etc are inlined. If they do not, the memory allocation skyrockets. +-- The GHC usually gets it right, but it is disastrous if it does not. Therefore +-- we explicitly mark these functions INLINE.