@@ -23,12 +23,28 @@ const map = UInt64[
2323
2424const maxU64 = typemax (UInt64)
2525
26+ """
27+ struct Minimizer
28+ value :: UInt64
29+ position :: UInt64
30+ end
31+
32+ A minimizer is a kmer that, given a hash function that maps kmers to integers, is the minimum kmer within a given set of kmers.
33+ The value is the result of applying the hash function to the kmer.
34+ The position is a bitpacked integer that includes reference ID, locus, and strand
35+ """
2636struct Minimizer
2737 value :: UInt64
2838 position :: UInt64
2939end
3040
3141# transliteration of the invertible hash function found in minimap
42+ """
43+ hash(x::UInt64, mask::UInt64)
44+
45+ A transliteration of Jenkin's invertible hash function for 64 bit integers.
46+ Bijectively maps any kmer to an integer.
47+ """
3248function hash (x:: UInt64 , mask:: UInt64 ):: UInt64
3349 x = (~ x + (x << 21 )) & mask
3450 x = x ⊻ x >> 24
@@ -40,6 +56,14 @@ function hash(x::UInt64, mask::UInt64)::UInt64
4056 return x
4157end
4258
59+ """
60+ sketch(seq::Array{UInt8}, k::Int, w::Int, id::Int)
61+
62+ Sketch a linear sequence into a vector of minimizers.
63+ `k` sets the kmer size.
64+ `w` sets the number of contiguous kmers that will be used in the window minimizer comparison.
65+ `id` is a unique integer that corresponds to the sequence. It will be bitpacked into the minimizer position.
66+ """
4367function sketch (seq:: Array{UInt8} , k:: Int , w:: Int , id:: Int )
4468 (k < 0 || k > 32 ) && error (" k='$(k) ' must be ∈ [0,32]" )
4569 (w < 0 || w > 255 ) && error (" w='$(w) ' must be ∈ [0,255]" )
147171
148172tuples (iter) = ((x,y) for (i,x) in enumerate (iter) for y in iter[i: end ])
149173
174+ """
175+ distance(graphs...; k=15, w=100)
176+
177+ Compute the pairwise distance between all input graphs.
178+ Distance is the set distance between minimizers.
179+ Linear-time algorithm using hash collisions.
180+ """
150181function distance (graphs... ; k= 15 , w= 100 )
151182 sequences = Dict (seq for graph in graphs for seq in sequence (graph))
152183
0 commit comments