haskell · clyring · Feb 4, 2024 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
@@ -1,8 +1,11 @@
-{-# LANGUAGE BangPatterns #-}
 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
 {-# OPTIONS_HADDOCK prune #-}
 {-# LANGUAGE Trustworthy #-}
 
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
 -- |
 -- Module      : Data.ByteString.Lazy
 -- Copyright   : (c) Don Stewart 2006
@@ -237,9 +240,9 @@ import qualified Data.ByteString        as P  (ByteString) -- type name only
 import qualified Data.ByteString        as S  -- S for strict (hmm...)
 import qualified Data.ByteString.Internal.Type as S
 import qualified Data.ByteString.Unsafe as S
-import qualified Data.ByteString.Lazy.Internal.Deque as D
 import Data.ByteString.Lazy.Internal
 
+import Control.Exception        (assert)
 import Control.Monad            (mplus)
 import Data.Word                (Word8)
 import Data.Int                 (Int64)
@@ -790,15 +793,73 @@ take i cs0         = take' i cs0
 --
 -- @since 0.11.2.0
 takeEnd :: Int64 -> ByteString -> ByteString
-takeEnd i _ | i <= 0 = Empty
-takeEnd i cs0        = takeEnd' i cs0
-  where takeEnd' 0 _         = Empty
-        takeEnd' n cs        =
-            snd $ foldrChunks takeTuple (n,Empty) cs
-        takeTuple _ (0, cs)  = (0, cs)
-        takeTuple c (n, cs)
-            | n > fromIntegral (S.length c) = (n - fromIntegral (S.length c), Chunk c cs)
-            | otherwise      = (0, Chunk (S.takeEnd (fromIntegral n) c) cs)
+takeEnd i bs
+  | i <= 0 = Empty
+  | otherwise = splitAtEndFold (\_ res -> res) id i bs
+
+-- | Helper function for implementing 'takeEnd' and 'dropEnd'
+splitAtEndFold
+  :: forall result
+  .  (S.StrictByteString -> result -> result)
+  -- ^ What to do when one chunk of output is ready
+  -- (The StrictByteString will not be empty.)
+  -> (ByteString -> result)
+  -- ^ What to do when the split-point is reached
+  -> Int64
+  -- ^ Number of bytes to leave at the end (must be strictly positive)
+  -> ByteString -- ^ Input ByteString
+  -> result
+{-# INLINE splitAtEndFold #-}
+splitAtEndFold step end len bs0 = assert (len > 0) $ case bs0 of
+  Empty -> end Empty
+  Chunk c t -> goR len c t t
+ where
+  -- Idea: Keep two references into the input ByteString:
+  --   "bsL" tracks the current split point,
+  --   "bsR" tracks the yet-unprocessed tail.
+  -- When they are closer than "len" bytes apart, process more input.  ("goR")
+  -- When they are  at  least  "len" bytes apart, produce more output. ("goL")
+  goR :: Int64 -> S.StrictByteString -> ByteString -> ByteString -> result
+  goR !undershoot nextOutput@(S.BS noFp noLen) bsL bsR =
+      assert (undershoot > 0) $
+      -- INVARIANT: length bsL == length bsR + len - undershoot
+      -- (not 'assert'ed because that would break our laziness properties)
+      case bsR of
+    Empty
+      | undershoot >= intToInt64 noLen
+        -> end (Chunk nextOutput bsL)
+      | undershootW <- fromIntegral @Int64 @Int undershoot
+        -- conversion Int64->Int is OK because 0 < undershoot < noLen
+      , splitIndex <- noLen - undershootW
+      , beforeSplit <- S.BS noFp splitIndex
+      , afterSplit <- S.BS (noFp `S.plusForeignPtr` splitIndex) undershootW
+        -> step beforeSplit $ end (Chunk afterSplit bsL)
+
+    Chunk (S.BS _ cLen) newBsR
+      | cLen64 <- intToInt64 cLen
+      , undershoot > cLen64
+        -> goR (undershoot - cLen64) nextOutput bsL newBsR
+      | undershootW <- fromIntegral @Int64 @Int undershoot
+        -> step nextOutput $ goL (cLen - undershootW) bsL newBsR
+
+  goL :: Int -> ByteString -> ByteString -> result
+  goL !overshoot bsL bsR =
+      assert (overshoot >= 0) $
+      -- INVARIANT: length bsL == length bsR + len + intToInt64 overshoot
+      -- (not 'assert'ed because that would break our laziness properties)
+      case bsL of
+    Empty -> splitAtEndFoldInvariantFailed
+    Chunk c@(S.BS _ cLen) newBsL
+      | overshoot >= cLen
+        -> step c $ goL (overshoot - cLen) newBsL bsR
+      | otherwise
+        -> goR (intToInt64 $ cLen - overshoot) c newBsL bsR
+
+splitAtEndFoldInvariantFailed :: a
+-- See Note [Float error calls out of INLINABLE things] in D.B.Internal.Type
+splitAtEndFoldInvariantFailed =
+  moduleError "splitAtEndFold"
+              "internal error: bsL not longer than bsR"
 
 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
 -- elements, or 'empty' if @n > 'length' xs@.
@@ -824,44 +885,9 @@ drop i cs0 = drop' i cs0
 --
 -- @since 0.11.2.0
 dropEnd :: Int64 -> ByteString -> ByteString
-dropEnd i p | i <= 0 = p
-dropEnd i p          = go D.empty p
-  where go :: D.Deque -> ByteString -> ByteString
-        go deque (Chunk c cs)
-            | D.byteLength deque < i = go (D.snoc c deque) cs
-            | otherwise              =
-                  let (output, deque') = getOutput empty (D.snoc c deque)
-                    in foldrChunks Chunk (go deque' cs) output
-        go deque Empty               = fromDeque $ dropEndBytes deque i
-
-        len c = fromIntegral (S.length c)
-
-        -- get a `ByteString` from all the front chunks of the accumulating deque
-        -- for which we know they won't be dropped
-        getOutput :: ByteString -> D.Deque -> (ByteString, D.Deque)
-        getOutput out deque = case D.popFront deque of
-            Nothing                       -> (reverseChunks out, deque)
-            Just (x, deque') | D.byteLength deque' >= i ->
-                            getOutput (Chunk x out) deque'
-            _ -> (reverseChunks out, deque)
-
-        -- reverse a `ByteString`s chunks, keeping all internal `S.StrictByteString`s
-        -- unchanged
-        reverseChunks = foldlChunks (flip Chunk) empty
-
-        -- drop n elements from the rear of the accumulating `deque`
-        dropEndBytes :: D.Deque -> Int64 -> D.Deque
-        dropEndBytes deque n = case D.popRear deque of
-            Nothing                       -> deque
-            Just (deque', x) | len x <= n -> dropEndBytes deque' (n - len x)
-                             | otherwise  ->
-                                D.snoc (S.dropEnd (fromIntegral n) x) deque'
-
-        -- build a lazy ByteString from an accumulating `deque`
-        fromDeque :: D.Deque -> ByteString
-        fromDeque deque =
-            List.foldr chunk Empty (D.front deque) `append`
-            List.foldl' (flip chunk) Empty (D.rear deque)
+dropEnd i p
+  | i <= 0 = p
+  | otherwise = splitAtEndFold Chunk (const Empty) i p
 
 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
@@ -1688,6 +1714,9 @@ revNonEmptyChunks = List.foldl' (flip Chunk) Empty
 revChunks :: [P.ByteString] -> ByteString
 revChunks = List.foldl' (flip chunk) Empty
 
+intToInt64 :: Int -> Int64
+intToInt64 = fromIntegral @Int @Int64
+
 -- $IOChunk
 --
 -- ⚠ Using lazy I\/O functions like 'readFile' or 'hGetContents'

diff --git a/Data/ByteString/Lazy/Internal/Deque.hs b/Data/ByteString/Lazy/Internal/Deque.hs
diff --git a/bytestring.cabal b/bytestring.cabal
@@ -106,7 +106,6 @@ library
                      Data.ByteString.Builder.RealFloat.Internal
                      Data.ByteString.Builder.RealFloat.TableGenerator
                      Data.ByteString.Internal.Type
-                     Data.ByteString.Lazy.Internal.Deque
                      Data.ByteString.Lazy.ReadInt
                      Data.ByteString.Lazy.ReadNat
                      Data.ByteString.ReadInt

diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs
@@ -80,6 +80,10 @@ import Test.Tasty
 import Test.Tasty.QuickCheck
 import QuickCheckUtils
 
+#ifdef BYTESTRING_LAZY
+import Data.Int
+#endif
+
 #ifndef BYTESTRING_CHAR8
 toElem :: Word8 -> Word8
 toElem = id
@@ -114,16 +118,25 @@ instance Arbitrary Natural where
 
 testRdInt :: forall a. (Arbitrary a, RdInt a) => String -> TestTree
 testRdInt s = testGroup s $
-    [ testProperty "from string" $ \ prefix value suffix ->
+    [ testProperty "from string" $ int64OK $ \value prefix suffix ->
         let si = show @a value
             b  = prefix <> B.pack si <> suffix
          in fmap (second B.unpack) (bread @a b)
             === sread @a (B.unpack prefix ++ si ++ B.unpack suffix)
-    , testProperty "from number" $ \n ->
+    , testProperty "from number" $ int64OK $ \n ->
         bread @a (B.pack (show n)) === Just (n, B.empty)
     ]
 #endif
 
+intToIndexTy :: Int -> IndexTy
+#ifdef BYTESTRING_LAZY
+type IndexTy = Int64
+intToIndexTy = fromIntegral @Int @Int64
+#else
+type IndexTy = Int
+intToIndexTy = id
+#endif
+
 tests :: [TestTree]
 tests =
   [ testProperty "pack . unpack" $
@@ -308,7 +321,7 @@ tests =
 #endif
 
   , testProperty "drop" $
-    \n x -> B.unpack (B.drop n x) === List.genericDrop n (B.unpack x)
+    \(intToIndexTy -> n) x -> B.unpack (B.drop n x) === List.genericDrop n (B.unpack x)
   , testProperty "drop 10" $
     \x -> let n = 10 in B.unpack (B.drop n x) === List.genericDrop n (B.unpack x)
   , testProperty "drop 2^31" $
@@ -325,7 +338,7 @@ tests =
 #endif
 
   , testProperty "take" $
-    \n x -> B.unpack (B.take n x) === List.genericTake n (B.unpack x)
+    \(intToIndexTy -> n) x -> B.unpack (B.take n x) === List.genericTake n (B.unpack x)
   , testProperty "take 10" $
     \x -> let n = 10 in B.unpack (B.take n x) === List.genericTake n (B.unpack x)
   , testProperty "take 2^31" $
@@ -342,11 +355,11 @@ tests =
 #endif
 
   , testProperty "dropEnd" $
-    \n x -> B.dropEnd n x === B.take (B.length x - n) x
+    \(intToIndexTy -> n) x -> B.dropEnd n x === B.take (B.length x - n) x
   , testProperty "dropWhileEnd" $
     \f x -> B.dropWhileEnd f x === B.reverse (B.dropWhile f (B.reverse x))
   , testProperty "takeEnd" $
-    \n x -> B.takeEnd n x === B.drop (B.length x - n) x
+    \(intToIndexTy -> n) x -> B.takeEnd n x === B.drop (B.length x - n) x
   , testProperty "takeWhileEnd" $
     \f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))
 
@@ -366,7 +379,7 @@ tests =
   , testProperty "compareLength 4" $
     \x (toElem -> c) -> B.compareLength (B.snoc x c <> undefined) (B.length x) === GT
   , testProperty "compareLength 5" $
-    \x n -> B.compareLength x n === compare (B.length x) n
+    \x (intToIndexTy -> n) -> B.compareLength x n === compare (B.length x) n
   , testProperty "dropEnd lazy" $
     \(toElem -> c) -> B.take 1 (B.dropEnd 1 (B.singleton c <> B.singleton c <> B.singleton c <> undefined)) === B.singleton c
   , testProperty "dropWhileEnd lazy" $
@@ -470,7 +483,8 @@ tests =
       (l1 == l2 || l1 == l2 + 1) && sum (map B.length splits) + l2 == B.length x
 
   , testProperty "splitAt" $
-    \n x -> (B.unpack *** B.unpack) (B.splitAt n x) === List.genericSplitAt n (B.unpack x)
+    \(intToIndexTy -> n) x -> (B.unpack *** B.unpack) (B.splitAt n x)
+                          === List.genericSplitAt n (B.unpack x)
   , testProperty "splitAt 10" $
     \x -> let n = 10 in (B.unpack *** B.unpack) (B.splitAt n x) === List.genericSplitAt n (B.unpack x)
   , testProperty "splitAt (2^31)" $
@@ -594,13 +608,13 @@ tests =
 #endif
 
   , testProperty "index" $
-    \(NonNegative n) x -> fromIntegral n < B.length x ==> B.index x (fromIntegral n) === B.unpack x !! n
+    \(NonNegative n) x -> intToIndexTy n < B.length x ==> B.index x (intToIndexTy n) === B.unpack x !! n
   , testProperty "indexMaybe" $
-    \(NonNegative n) x -> fromIntegral n < B.length x ==> B.indexMaybe x (fromIntegral n) === Just (B.unpack x !! n)
+    \(NonNegative n) x -> intToIndexTy n < B.length x ==> B.indexMaybe x (intToIndexTy n) === Just (B.unpack x !! n)
   , testProperty "indexMaybe Nothing" $
-    \n x -> (n :: Int) < 0 || fromIntegral n >= B.length x ==> B.indexMaybe x (fromIntegral n) === Nothing
+    \n x -> n < 0 || intToIndexTy n >= B.length x ==> B.indexMaybe x (intToIndexTy n) === Nothing
   , testProperty "!?" $
-    \n x -> B.indexMaybe x (fromIntegral (n :: Int)) === x B.!? (fromIntegral n)
+    \(intToIndexTy -> n) x -> B.indexMaybe x n === x B.!? n
 
 #ifdef BYTESTRING_CHAR8
   , testProperty "isString" $

diff --git a/tests/QuickCheckUtils.hs b/tests/QuickCheckUtils.hs
@@ -1,12 +1,16 @@
+{-# LANGUAGE DataKinds #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE UndecidableInstances #-}
 
 module QuickCheckUtils
   ( Char8(..)
   , String8(..)
   , CByteString(..)
   , Sqrt(..)
+  , int64OK
   ) where
 
 import Test.Tasty.QuickCheck
@@ -18,6 +22,7 @@ import Data.Word
 import Data.Int
 import System.IO
 import Foreign.C (CChar)
+import GHC.TypeLits (TypeError, ErrorMessage(..))
 
 import qualified Data.ByteString.Short as SB
 import qualified Data.ByteString      as P
@@ -112,3 +117,22 @@ instance Arbitrary SB.ShortByteString where
 
 instance CoArbitrary SB.ShortByteString where
   coarbitrary s = coarbitrary (SB.unpack s)
+
+-- | This /poison instance/ exists to make accidental mis-use
+-- of the @Arbitrary Int64@ instance a bit less likely.
+instance {-# OVERLAPPING #-}
+  TypeError (Text "Found a test taking a raw Int64 argument."
+    :$$: Text "'instance Arbitrary Int64' by default is likely to"
+    :$$: Text "produce very large numbers after the first few tests,"
+    :$$: Text "which doesn't make great indices into a LazyByteString."
+    :$$: Text "For indices, try 'intToIndexTy' in Properties/ByteString.hs."
+    :$$: Text ""
+    :$$: Text "If very few small-numbers tests is OK, use"
+    :$$: Text "'int64OK' to bypass this poison-instance."
+  ) => Testable (Int64 -> prop) where
+  property = error "poison instance Testable (Int64 -> prop)"
+
+-- | Use this to bypass the poison instance for @Testable (Int64 -> prop)@
+-- defined in "QuickCheckUtils".
+int64OK :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property
+int64OK f = propertyForAllShrinkShow arbitrary shrink (\v -> [show v]) f