Skip to content

Commit 9a40da0

Browse files
committed
Add NonEmpty variants of inits and tails (#557)
* Add basic benchmarks for inits/tails * Add NonEmpty variants of inits and tails The lazy versions use new implementations: - Lazy tails got about 10% faster with ghc-9.2. (A happy accident!) - Lazy inits got much faster: - For the first few chunks it is about 1.5x faster, due to better list fusion. - When there are many chunks it is about 4x faster. * Formatting and comments, as suggested in review * Add link to a relevant CLC issue about NonEmpty - haskell/core-libraries-committee#107 (cherry picked from commit d4933c6)
1 parent abc756f commit 9a40da0

File tree

6 files changed

+102
-19
lines changed

6 files changed

+102
-19
lines changed

Data/ByteString.hs

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ module Data.ByteString (
130130
groupBy,
131131
inits,
132132
tails,
133+
initsNE,
134+
tailsNE,
133135
stripPrefix,
134136
stripSuffix,
135137

@@ -235,6 +237,8 @@ import Data.ByteString.Lazy.Internal (fromStrict, toStrict)
235237
import Data.ByteString.Unsafe
236238

237239
import qualified Data.List as List
240+
import qualified Data.List.NonEmpty as NE
241+
import Data.List.NonEmpty (NonEmpty(..))
238242

239243
import Data.Word (Word8)
240244

@@ -427,7 +431,7 @@ last ps@(BS x l)
427431
unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1)
428432
{-# INLINE last #-}
429433

430-
-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
434+
-- | /O(1)/ Returns all the elements of a 'ByteString' except the last one.
431435
-- An exception will be thrown in the case of an empty ByteString.
432436
--
433437
-- This is a partial function, consider using 'unsnoc' instead.
@@ -1690,17 +1694,47 @@ unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
16901694
-- ---------------------------------------------------------------------
16911695
-- Special lists
16921696

1693-
-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1697+
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
16941698
inits :: ByteString -> [ByteString]
1695-
inits (BS x l) = [BS x n | n <- [0..l]]
1699+
-- see Note [Avoid NonEmpty combinators]
1700+
inits bs = NE.toList $! initsNE bs
16961701

1697-
-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1702+
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
1703+
--
1704+
-- @since 0.11.4.0
1705+
initsNE :: ByteString -> NonEmpty ByteString
1706+
-- see Note [Avoid NonEmpty combinators]
1707+
initsNE (BS x len) = empty :| [BS x n | n <- [1..len]]
1708+
1709+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
16981710
tails :: ByteString -> [ByteString]
1699-
tails p | null p = [empty]
1700-
| otherwise = p : tails (unsafeTail p)
1711+
-- see Note [Avoid NonEmpty combinators]
1712+
tails bs = NE.toList $! tailsNE bs
1713+
1714+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
1715+
--
1716+
-- @since 0.11.4.0
1717+
tailsNE :: ByteString -> NonEmpty ByteString
1718+
-- see Note [Avoid NonEmpty combinators]
1719+
tailsNE p | null p = empty :| []
1720+
| otherwise = p :| tails (unsafeTail p)
17011721

17021722
-- less efficent spacewise: tails (BS x l) = [BS (plusForeignPtr x n) (l-n) | n <- [0..l]]
17031723

1724+
{-
1725+
Note [Avoid NonEmpty combinators]
1726+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1727+
1728+
As of base-4.17, most of the NonEmpty API is surprisingly lazy.
1729+
Using it without forcing the arguments yourself is just begging GHC
1730+
to make your code waste time allocating useless selector thunks.
1731+
This may change in the future. See also this CLC issue:
1732+
https://github.com/haskell/core-libraries-committee/issues/107
1733+
But until then, "refactor" with care!
1734+
-}
1735+
1736+
1737+
17041738
-- ---------------------------------------------------------------------
17051739
-- ** Ordered 'ByteString's
17061740

Data/ByteString/Char8.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ module Data.ByteString.Char8 (
129129
groupBy,
130130
inits,
131131
tails,
132+
initsNE,
133+
tailsNE,
132134
strip,
133135
stripPrefix,
134136
stripSuffix,
@@ -249,7 +251,7 @@ import qualified Data.ByteString.Unsafe as B
249251

250252
-- Listy functions transparently exported
251253
import Data.ByteString (null,length,tail,init,append
252-
,inits,tails,reverse,transpose
254+
,inits,tails,initsNE,tailsNE,reverse,transpose
253255
,concat,take,takeEnd,drop,dropEnd,splitAt
254256
,intercalate,sort,isPrefixOf,isSuffixOf
255257
,isInfixOf,stripPrefix,stripSuffix

Data/ByteString/Lazy.hs

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ module Data.ByteString.Lazy (
145145
groupBy,
146146
inits,
147147
tails,
148+
initsNE,
149+
tailsNE,
148150
stripPrefix,
149151
stripSuffix,
150152

@@ -228,6 +230,8 @@ import Prelude hiding
228230
,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
229231

230232
import qualified Data.List as List
233+
import qualified Data.List.NonEmpty as NE
234+
import Data.List.NonEmpty (NonEmpty(..))
231235
import qualified Data.Bifunctor as BF
232236
import qualified Data.ByteString as P (ByteString) -- type name only
233237
import qualified Data.ByteString as S -- S for strict (hmm...)
@@ -384,7 +388,7 @@ last (Chunk c0 cs0) = go c0 cs0
384388
go _ (Chunk c cs) = go c cs
385389
-- XXX Don't inline this. Something breaks with 6.8.2 (haven't investigated yet)
386390

387-
-- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
391+
-- | /O(n\/c)/ Returns all the elements of a 'ByteString' except the last one.
388392
--
389393
-- This is a partial function, consider using 'unsnoc' instead.
390394
init :: HasCallStack => ByteString -> ByteString
@@ -1433,19 +1437,39 @@ unzip ls = (pack (List.map fst ls), pack (List.map snd ls))
14331437
-- ---------------------------------------------------------------------
14341438
-- Special lists
14351439

1436-
-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1440+
-- | Returns all initial segments of the given 'ByteString', shortest first.
14371441
inits :: ByteString -> [ByteString]
1438-
inits = (Empty :) . inits'
1439-
where inits' Empty = []
1440-
inits' (Chunk c cs) = List.map (`Chunk` Empty) (List.drop 1 (S.inits c))
1441-
++ List.map (Chunk c) (inits' cs)
1442+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1443+
inits bs = NE.toList $! initsNE bs
14421444

1443-
-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1445+
-- | Returns all initial segments of the given 'ByteString', shortest first.
1446+
--
1447+
-- @since 0.11.4.0
1448+
initsNE :: ByteString -> NonEmpty ByteString
1449+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1450+
initsNE = (Empty :|) . inits' id
1451+
where
1452+
inits' :: (ByteString -> ByteString) -> ByteString -> [ByteString]
1453+
-- inits' f bs === map f (tail (inits bs))
1454+
inits' _ Empty = []
1455+
inits' f (Chunk c@(S.BS x len) cs)
1456+
= [f (S.BS x n `Chunk` Empty) | n <- [1..len]]
1457+
++ inits' (f . Chunk c) cs
1458+
1459+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
14441460
tails :: ByteString -> [ByteString]
1445-
tails Empty = [Empty]
1446-
tails cs@(Chunk c cs')
1447-
| S.length c == 1 = cs : tails cs'
1448-
| otherwise = cs : tails (Chunk (S.unsafeTail c) cs')
1461+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1462+
tails bs = NE.toList $! tailsNE bs
1463+
1464+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
1465+
--
1466+
-- @since 0.11.4.0
1467+
tailsNE :: ByteString -> NonEmpty ByteString
1468+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1469+
tailsNE bs = case uncons bs of
1470+
Nothing -> Empty :| []
1471+
Just (_, tl) -> bs :| tails tl
1472+
14491473

14501474
-- ---------------------------------------------------------------------
14511475
-- Low level constructors

Data/ByteString/Lazy/Char8.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ module Data.ByteString.Lazy.Char8 (
123123
groupBy,
124124
inits,
125125
tails,
126+
initsNE,
127+
tailsNE,
126128
stripPrefix,
127129
stripSuffix,
128130

@@ -212,7 +214,7 @@ import Data.ByteString.Lazy
212214
(fromChunks, toChunks
213215
,empty,null,length,tail,init,append,reverse,transpose,cycle
214216
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
215-
,isPrefixOf,isSuffixOf,group,inits,tails,copy
217+
,isPrefixOf,isSuffixOf,group,inits,tails,initsNE,tailsNE,copy
216218
,stripPrefix,stripSuffix
217219
,hGetContents, hGet, hPut, getContents
218220
,hGetNonBlocking, hPutNonBlocking

bench/BenchAll.hs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,12 @@ lazyByteStringData :: L.ByteString
9595
lazyByteStringData = case S.splitAt (nRepl `div` 2) byteStringData of
9696
(bs1, bs2) -> L.fromChunks [bs1, bs2]
9797

98+
{-# NOINLINE smallChunksData #-}
99+
smallChunksData :: L.ByteString
100+
smallChunksData
101+
= L.fromChunks [S.take sz (S.drop n byteStringData)
102+
| let sz = 48, n <- [0, sz .. S.length byteStringData]]
103+
98104
{-# NOINLINE byteStringChunksData #-}
99105
byteStringChunksData :: [S.ByteString]
100106
byteStringChunksData = map (S.pack . replicate (4 ) . fromIntegral) intData
@@ -402,6 +408,15 @@ main = do
402408
, bench "balancedSlow" $ partitionLazy (\x -> hashWord8 x < w 128)
403409
]
404410
]
411+
, bgroup "inits"
412+
[ bench "strict" $ nf S.inits byteStringData
413+
, bench "lazy" $ nf L.inits lazyByteStringData
414+
, bench "lazy (small chunks)" $ nf L.inits smallChunksData
415+
]
416+
, bgroup "tails"
417+
[ bench "strict" $ nf S.tails byteStringData
418+
, bench "lazy" $ nf L.tails lazyByteStringData
419+
]
405420
, bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs
406421
, bgroup "words"
407422
[ bench "lorem ipsum" $ nf S8.words loremIpsum

tests/Properties/ByteString.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,12 @@ import Text.Read
5656

5757
#endif
5858

59+
import Prelude hiding (head, tail)
5960
import Control.Arrow
6061
import Data.Char
6162
import Data.Foldable
6263
import qualified Data.List as List
64+
import qualified Data.List.NonEmpty as NE
6365
import Data.Semigroup
6466
import Data.String
6567
import Data.Tuple
@@ -183,6 +185,10 @@ tests =
183185
\x -> map B.unpack (B.inits x) === List.inits (B.unpack x)
184186
, testProperty "tails" $
185187
\x -> map B.unpack (B.tails x) === List.tails (B.unpack x)
188+
, testProperty "initsNE" $
189+
\x -> NE.map B.unpack (B.initsNE x) === NE.inits (B.unpack x)
190+
, testProperty "tailsNE" $
191+
\x -> NE.map B.unpack (B.tailsNE x) === NE.tails (B.unpack x)
186192
#endif
187193
, testProperty "all" $
188194
\f x -> B.all f x === all f (B.unpack x)

0 commit comments

Comments
 (0)