-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSearch.hs
72 lines (60 loc) · 2.47 KB
/
Search.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
{-# LANGUAGE TypeFamilies #-}
module Search
( SearchLimit (..),
Search (..),
searchLimit,
searchMatchFingerprint,
searchMatchSignature,
runSearch,
)
where
import Control.Lens (Lens', (^.))
import Data.Ord (comparing)
import Data.Word (Word8)
import Streamly.Internal.Data.Stream.IsStream (concatPairsWith)
import Streamly.Prelude (IsStream, SerialT)
import qualified Streamly.Prelude as Stream
newtype SearchLimit
= SearchLimit Word8
data Search m a a' b b' c
= Search
SearchLimit
(a -> a -> SerialT m (Int, a'))
(b -> b -> SerialT m b')
(a' -> b' -> (Int, c))
takeLimit :: Monad m => SearchLimit -> SerialT m a -> SerialT m a
takeLimit (SearchLimit a) =
Stream.take (fromIntegral a)
searchLimit :: Lens' (Search m a a' b b' c) SearchLimit
searchLimit f (Search a b c d) =
(\a' -> Search a' b c d) <$> f a
searchMatchFingerprint :: Lens' (Search m a a' b b' c) (a -> a -> SerialT m (Int, a'))
searchMatchFingerprint f (Search a b c d) =
(\b' -> Search a b' c d) <$> f b
searchMatchSignature :: Lens' (Search m a a' b b' c) (b -> b -> SerialT m b')
searchMatchSignature f (Search a b c d) =
(\c' -> Search a b c' d) <$> f c
searchCombineCosts :: Lens' (Search m a a' b b' c) (a' -> b' -> (Int, c))
searchCombineCosts f (Search a b c d) =
Search a b c <$> f d
sortOn' :: (IsStream t, Monad m) => (b -> b -> Ordering) -> t m b -> t m b
sortOn' f =
concatPairsWith (Stream.mergeBy f) Stream.fromPure
applyCosts :: (Monad n, Ord z) => Search m a a' b b' c -> SerialT n (x, (z, y)) -> SerialT n (x, y)
applyCosts search =
fmap (\(x, (_, y)) -> (x, y)) . takeLimit (search ^. searchLimit) . sortOn' (comparing (fst . snd))
-- | Find some candidates using a fast search (e.g. fingerprints) then look
-- | into each candidate using an expensive matching algorithm (e.g.
-- | unification) - both calculate a "cost" for each match which is used to
-- | sort results.
runSearch :: Monad m => Search m a a' b b' c -> [(b, a)] -> a -> b -> SerialT m (b, c)
runSearch search fingerprints fingerprint query =
applyCosts search $ do
(candidate, fingerprintCost) <- applyCosts search bestByFingerprint
signatureCost <- (search ^. searchMatchSignature) query candidate
pure (candidate, (search ^. searchCombineCosts) fingerprintCost signatureCost)
where
bestByFingerprint = do
(b, fingerprint') <- Stream.fromList fingerprints
fingerprintCost <- (search ^. searchMatchFingerprint) fingerprint fingerprint'
pure (b, fingerprintCost)