Skip to content

Commit 1f3bc28

Browse files
authored
Merge pull request #5 from vgteam/sd-vector-one-iterator
Iterator over set bits in sd_vector
2 parents 963d30b + 9f1d17b commit 1f3bc28

File tree

4 files changed

+364
-34
lines changed

4 files changed

+364
-34
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ As [SDSL 2](https://github.com/simongog/sdsl-lite) is no longer maintained, vgte
88
## Major changes
99

1010
- [x] Switch to C++14.
11-
- [ ] Import `sd_vector` iterator from GBWT.
11+
- [x] `sd_vector` improvements: iterator over set bits and support for predecessor/successor queries.
1212

1313
## Tools/libraries using this fork
1414

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.1.0
1+
2.2.0

include/sdsl/sd_vector.hpp

Lines changed: 237 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/* sdsl - succinct data structures library
22
Copyright (C) 2012-2014 Simon Gog
33
Copyright (C) 2015 Genome Research Ltd.
4+
Copyright (C) 2021 Jouni Siren
45
56
This program is free software: you can redistribute it and/or modify
67
it under the terms of the GNU General Public License as published by
@@ -32,6 +33,8 @@
3233
namespace sdsl
3334
{
3435

36+
//-----------------------------------------------------------------------------
37+
3538
// forward declaration needed for friend declaration
3639
template<uint8_t t_b = 1,
3740
class t_hi_bit_vector= bit_vector,
@@ -48,7 +51,9 @@ class select_support_sd; // in sd_vector
4851

4952
// forward declaration needed for friend declaration
5053
template<typename, typename, typename>
51-
class sd_vector; // in sd_vector
54+
class sd_vector;
55+
56+
//-----------------------------------------------------------------------------
5257

5358
//! Class for in-place construction of sd_vector from a strictly increasing sequence
5459
/*! \par Building an `sd_vector` will clear the builder.
@@ -123,6 +128,109 @@ class sd_vector_builder
123128
void swap(sd_vector_builder& sdb);
124129
};
125130

131+
//-----------------------------------------------------------------------------
132+
133+
//! An a bidirectional iterator over the set bits in `sd_vector`.
134+
/*!
135+
* \par The `value_type` has semantics `(rank(i), i)` or `(i, select(i + 1))`.
136+
*/
137+
template<class t_hi_bit_vector, class t_select_1, class t_select_0>
138+
class sd_one_iterator
139+
{
140+
public:
141+
typedef sd_vector<t_hi_bit_vector, t_select_1, t_select_0> vector_type;
142+
typedef typename vector_type::size_type size_type;
143+
typedef std::pair<size_type, typename vector_type::value_type> value_type;
144+
typedef typename vector_type::difference_type difference_type;
145+
typedef const value_type* pointer;
146+
typedef const value_type& reference;
147+
typedef std::bidirectional_iterator_tag iterator_category;
148+
149+
sd_one_iterator() : parent(nullptr), low_offset(0), high_offset(0), value(0, 0) {}
150+
151+
//! Primary constructor.
152+
/*!
153+
* \param parent The parent bitvector.
154+
* \param offset Offset in the parent bitvector.
155+
* \param low_offset Offset in `parent->low`; also `rank(offset)`.
156+
* \param high_offset Offset in `parent->high`.
157+
*/
158+
sd_one_iterator(const vector_type* parent, size_type offset, size_type low_offset, size_type high_offset) :
159+
parent(parent), low_offset(low_offset), high_offset(high_offset),
160+
value(low_offset, offset)
161+
{
162+
}
163+
164+
bool operator==(const sd_one_iterator& another) const
165+
{
166+
return (this->low_offset == another.low_offset);
167+
}
168+
169+
bool operator!=(const sd_one_iterator& another) const
170+
{
171+
return (this->low_offset != another.low_offset);
172+
}
173+
174+
//! Returns the current value.
175+
/*!
176+
* \par The value is `(rank(i), i)` or `(i, select(i + 1))`.
177+
*/
178+
reference operator*() const { return this->value; }
179+
180+
//! Returns a pointer to the current value.
181+
/*!
182+
* \par The value is `(rank(i), i)` or `(i, select(i + 1))`.
183+
*/
184+
pointer operator->() const { return &(this->value); }
185+
186+
sd_one_iterator& operator++()
187+
{
188+
this->low_offset++;
189+
if (this->low_offset != this->parent->ones()) {
190+
do { this->high_offset++; }
191+
while (!this->parent->high[this->high_offset]);
192+
this->set_value();
193+
}
194+
return *this;
195+
}
196+
197+
sd_one_iterator& operator++(int)
198+
{
199+
sd_one_iterator result = *this;
200+
++(*this);
201+
return result;
202+
}
203+
204+
sd_one_iterator& operator--()
205+
{
206+
this->low_offset--;
207+
do { this->high_offset--; }
208+
while (!this->parent->high[this->high_offset]);
209+
this->set_value();
210+
return *this;
211+
}
212+
213+
sd_one_iterator& operator--(int)
214+
{
215+
sd_one_iterator result = *this;
216+
--(*this);
217+
return result;
218+
}
219+
220+
private:
221+
const vector_type* parent;
222+
size_type low_offset, high_offset;
223+
value_type value;
224+
225+
void set_value()
226+
{
227+
this->value.first = this->low_offset;
228+
this->value.second = this->parent->combine(this->low_offset, this->high_offset);
229+
}
230+
};
231+
232+
//-----------------------------------------------------------------------------
233+
126234
//! A bit vector which compresses very sparse populated bit vectors by
127235
// representing the positions of 1 by the Elias-Fano representation for non-decreasing sequences
128236
/*!
@@ -148,6 +256,8 @@ template<class t_hi_bit_vector = bit_vector,
148256
class t_select_0 = typename t_hi_bit_vector::select_0_type>
149257
class sd_vector
150258
{
259+
friend class sd_one_iterator<t_hi_bit_vector, t_select_1, t_select_0>;
260+
151261
public:
152262
typedef bit_vector::size_type size_type;
153263
typedef size_type value_type;
@@ -163,7 +273,10 @@ class sd_vector
163273
typedef select_support_sd<0, t_hi_bit_vector, select_1_support_type, select_0_support_type> select_0_type;
164274
typedef select_support_sd<1, t_hi_bit_vector, select_1_support_type, select_0_support_type> select_1_type;
165275

276+
typedef sd_one_iterator<t_hi_bit_vector, select_1_support_type, select_0_support_type> one_iterator;
277+
166278
typedef t_hi_bit_vector hi_bit_vector_type;
279+
167280
private:
168281
// we need this variables to represent the m ones of the original bit vector of size n
169282
size_type m_size = 0; // length of the original bit vector
@@ -188,6 +301,8 @@ class sd_vector
188301
m_high_0_select.set_vector(&m_high);
189302
}
190303

304+
//-----------------------------------------------------------------------------
305+
191306
public:
192307
const uint8_t& wl = m_wl;
193308
const hi_bit_vector_type& high = m_high;
@@ -304,6 +419,8 @@ class sd_vector
304419
builder = sd_vector_builder();
305420
}
306421

422+
//-----------------------------------------------------------------------------
423+
307424
//! Accessing the i-th element of the original bit_vector
308425
/*! \param i An index i with \f$ 0 \leq i < size() \f$.
309426
* \return The i-th bit of the original bit_vector
@@ -381,6 +498,8 @@ class sd_vector
381498
}
382499
}
383500

501+
//-----------------------------------------------------------------------------
502+
384503
//! Swap method
385504
void swap(sd_vector& v)
386505
{
@@ -395,11 +514,17 @@ class sd_vector
395514
}
396515

397516
//! Returns the size of the original bit vector.
398-
size_type size()const
517+
size_type size() const
399518
{
400519
return m_size;
401520
}
402521

522+
//! Returns the number of set bits in the bitvector.
523+
size_type ones() const
524+
{
525+
return m_low.size();
526+
}
527+
403528
sd_vector& operator=(const sd_vector& v)
404529
{
405530
if (this != &v) {
@@ -449,6 +574,8 @@ class sd_vector
449574
m_high_0_select.load(in, &m_high);
450575
}
451576

577+
//-----------------------------------------------------------------------------
578+
452579
iterator begin() const
453580
{
454581
return iterator(this, 0);
@@ -458,11 +585,116 @@ class sd_vector
458585
{
459586
return iterator(this, size());
460587
}
588+
589+
one_iterator one_begin() const
590+
{
591+
size_type offset = 0, high_offset = 0;
592+
if (this->ones() > 0)
593+
{
594+
while (!this->high[high_offset]) { high_offset++; }
595+
offset = this->combine(0, high_offset);
596+
}
597+
return one_iterator(this, offset, 0, high_offset);
598+
}
599+
600+
one_iterator one_end() const
601+
{
602+
return one_iterator(this, this->size(), this->ones(), this->high.size());
603+
}
604+
605+
//! Returns an iterator at the set bit of the specified rank in the bitvector.
606+
/*!
607+
* \param i One-based rank in the bitvector.
608+
* \par The returned iterator will have value `(i - 1, select(i))`.
609+
* Returns `one_end()` if no such bit exists.
610+
*/
611+
one_iterator select_iter(size_type i) const
612+
{
613+
if (i == 0 || i > this->ones()) { return this->one_end(); }
614+
615+
size_type high_offset = this->high_1_select(i);
616+
return one_iterator(this, this->combine(i - 1, high_offset), i - 1, high_offset);
617+
}
618+
619+
//! Returns an iterator to the last set bit at or before the argument.
620+
/*!
621+
* \param i Offset in the bitvector.
622+
* \par Returns `one_end()` if no such bit exists.
623+
*/
624+
one_iterator predecessor(size_type i) const
625+
{
626+
if (this->ones() == 0) { return this->one_end(); }
627+
i = std::min(i, this->size() - 1);
628+
629+
// Find the 0 in `high` that follows all the values with the same `high_part`.
630+
size_type high_part = i >> this->wl;
631+
size_type low_part = i & bits::lo_set[this->wl];
632+
size_type high_offset = this->high_0_select(high_part + 1);
633+
size_type low_offset = high_offset - high_part;
634+
if (low_offset == 0) { return this->one_end(); }
635+
636+
// Iterate backward over the values sharing the same `high_part` until we
637+
// find the predecessor or run out of such values.
638+
high_offset--; low_offset--;
639+
while (this->high[high_offset] && this->low[low_offset] > low_part) {
640+
if (low_offset == 0) { return this->one_end(); }
641+
high_offset--; low_offset--;
642+
}
643+
644+
// The predecessor could also have a lower `high_part`.
645+
while (!this->high[high_offset]) { high_offset--; }
646+
647+
return one_iterator(this, this->combine(low_offset, high_offset), low_offset, high_offset);
648+
}
649+
650+
//! Returns an iterator to the first set bit at or after the argument.
651+
/*!
652+
* \param i Offset in the bitvector.
653+
* \par Returns `one_end()` if no such bit exists.
654+
*/
655+
one_iterator successor(size_type i) const
656+
{
657+
if (i >= this->size()) { return this->one_end(); }
658+
659+
// Find the offset in `high` that follows the 0 preceding the values with the same `high_part`.
660+
size_type high_part = i >> this->wl;
661+
size_type low_part = i & bits::lo_set[this->wl];
662+
size_type high_offset = (high_part == 0 ? 0 : this->high_0_select(high_part) + 1);
663+
size_type low_offset = high_offset - high_part;
664+
665+
// Iterate over the values sharing the same `low_part` until we find the successor
666+
// or run out of such values.
667+
while (high_offset < this->high.size() && this->high[high_offset]) {
668+
if (this->low[low_offset] >= low_part) {
669+
return one_iterator(this, this->combine(low_offset, high_offset), low_offset, high_offset);
670+
}
671+
high_offset++; low_offset++;
672+
}
673+
674+
// The successor has a greater `high_part`, so we continue iterating until we find it.
675+
while (high_offset < this->high.size()) {
676+
if (this->high[high_offset]) {
677+
return one_iterator(this, this->combine(low_offset, high_offset), low_offset, high_offset);
678+
}
679+
high_offset++;
680+
}
681+
682+
return this->one_end();
683+
}
684+
685+
private:
686+
// Combine `low_offset` and `high_offset` into bitvector offset.
687+
size_type combine(size_type low_offset, size_type high_offset) const
688+
{
689+
return this->low[low_offset] + ((high_offset - low_offset) << this->wl);
690+
}
461691
};
462692

463693
//! Specialized constructor that is a bit more space-efficient than the default.
464694
template<> sd_vector<>::sd_vector(sd_vector_builder& builder);
465695

696+
//-----------------------------------------------------------------------------
697+
466698
template<uint8_t t_b>
467699
struct rank_support_sd_trait {
468700
typedef bit_vector::size_type size_type;
@@ -564,6 +796,8 @@ class rank_support_sd
564796
}
565797
};
566798

799+
//-----------------------------------------------------------------------------
800+
567801
template<uint8_t t_b, class t_sd_vec>
568802
struct select_support_sd_trait {
569803
typedef bit_vector::size_type size_type;
@@ -669,7 +903,6 @@ class select_support_sd
669903
}
670904
};
671905

672-
673906
//! Select_0 data structure for sd_vector
674907
/*! \tparam t_sd_vector sd_vector type
675908
* \tparam t_rank_1 Rank support for high part of sd_vector
@@ -853,6 +1086,7 @@ class select_0_support_sd
8531086

8541087
};
8551088

1089+
//-----------------------------------------------------------------------------
8561090

8571091
} // end namespace
8581092
#endif

0 commit comments

Comments
 (0)