SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
dac_vector.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_DAC_VECTOR
9#define SDSL_DAC_VECTOR
10
11#include <algorithm>
12#include <assert.h>
13#include <iosfwd>
14#include <stddef.h>
15#include <stdint.h>
16#include <string>
17
18#include <sdsl/bits.hpp>
19#include <sdsl/cereal.hpp>
20#include <sdsl/int_vector.hpp>
22#include <sdsl/io.hpp>
23#include <sdsl/iterators.hpp>
27#include <sdsl/util.hpp>
28
30namespace sdsl
31{
32
34
58template <uint8_t t_b = 4, typename t_rank = rank_support_v5<>>
60{
61private:
62 static_assert(t_b > 0, "dac_vector: t_b has to be larger than 0");
63 static_assert(t_b < 64, "dac_vector: t_b has to be smaller than 64");
64
65public:
72 typedef const pointer const_pointer;
74 typedef ptrdiff_t difference_type;
75 typedef t_rank rank_support_type;
77
78private:
79 int_vector<t_b> m_data; // block data for every level
80 bit_vector m_overflow; // mark non-end bytes
81 rank_support_type m_overflow_rank; // rank for m_overflow
82 int_vector<64> m_level_pointer_and_rank = int_vector<64>(4, 0);
83 uint8_t m_max_level; // maximum level < (log n)/b+1
84
85public:
86 dac_vector() = default;
87
89 m_data(v.m_data),
90 m_overflow(v.m_overflow),
91 m_overflow_rank(v.m_overflow_rank),
92 m_level_pointer_and_rank(v.m_level_pointer_and_rank),
93 m_max_level(v.m_max_level)
94 {
95 m_overflow_rank.set_vector(&m_overflow);
96 }
97
99 {
100 *this = std::move(v);
101 }
103 {
104 if (this != &v)
105 {
106 dac_vector tmp(v);
107 *this = std::move(tmp);
108 }
109 return *this;
110 }
111
113 {
114 if (this != &v)
115 {
116 m_data = std::move(v.m_data);
117 m_overflow = std::move(v.m_overflow);
118 m_overflow_rank = std::move(v.m_overflow_rank);
119 m_overflow_rank.set_vector(&m_overflow);
120 m_level_pointer_and_rank = std::move(v.m_level_pointer_and_rank);
121 m_max_level = std::move(v.m_max_level);
122 }
123 return *this;
124 }
125
127
130 template <class Container>
131 dac_vector(Container const & c);
132
134 template <uint8_t int_width>
136
139 {
140 return m_level_pointer_and_rank[2];
141 }
142
144 {
145 return int_vector<>::max_size() / 2;
146 }
147
149 bool empty() const
150 {
151 return 0 == m_level_pointer_and_rank[2];
152 }
153
155 const const_iterator begin() const
156 {
157 return const_iterator(this, 0);
158 }
159
161 const const_iterator end() const
162 {
163 return const_iterator(this, size());
164 }
165
168 {
169 uint8_t level = 1;
170 uint8_t offset = t_b;
171 size_type result = m_data[i];
172 uint64_t const * p = m_level_pointer_and_rank.data();
173 uint64_t ppi = (*p) + i;
174 while (level < m_max_level and m_overflow[ppi])
175 {
176 p += 2;
177 ppi = *p + (m_overflow_rank(ppi) - *(p - 1));
178 result |= (m_data[ppi] << (offset));
179 ++level;
180 offset += t_b;
181 }
182 return result;
183 }
184
186 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const;
187
189 void load(std::istream & in)
190 {
191 m_data.load(in);
192 m_overflow.load(in);
193 m_overflow_rank.load(in, &m_overflow);
194 m_level_pointer_and_rank.load(in);
195 read_member(m_max_level, in);
196 }
197
199 template <typename archive_t>
200 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const;
201
202 template <typename archive_t>
203 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar);
204
205 bool operator==(dac_vector const & v) const
206 {
207 return (m_max_level == v.m_max_level) && (m_data == v.m_data) && (m_overflow == v.m_overflow)
208 && (m_overflow_rank == v.m_overflow_rank) && (m_level_pointer_and_rank == v.m_level_pointer_and_rank);
209 }
210
211 bool operator!=(dac_vector const & v) const
212 {
213 return !(*this == v);
214 }
215};
216
217template <uint8_t t_b, typename t_rank>
218template <class Container>
220{
221 // (1) Count for each level, how many blocks are needed for the representation
222 // Running time: \f$ O(n \times \frac{\log n}{b} \f$
223 // Result is sorted in m_level_pointer_and_rank
224 size_type n = c.size(), val = 0;
225 if (n == 0)
226 return;
227 // initialize counter
228 m_level_pointer_and_rank = int_vector<64>(128, 0);
229 m_level_pointer_and_rank[0] = n; // level 0 has n entries
230
231 uint8_t level_x_2 = 0;
232 uint8_t max_level_x_2 = 4;
233 for (size_type i = 0; i < n; ++i)
234 {
235 val = c[i];
236 val >>= t_b; // shift value b bits to the right
237 level_x_2 = 2;
238 while (val)
239 {
240 // increase counter for current level by 1
241 ++m_level_pointer_and_rank[level_x_2];
242 val >>= t_b; // shift value b bits to the right
243 level_x_2 += 2; // increase level by 1
244 max_level_x_2 = std::max(max_level_x_2, level_x_2);
245 }
246 }
247 m_level_pointer_and_rank.resize(max_level_x_2);
248 // (2) Determine maximum level and prefix sums of level counters
249 m_max_level = 0;
250 size_type sum_blocks = 0, last_block_size = 0;
251 for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2)
252 {
253 t = sum_blocks;
254 sum_blocks += m_level_pointer_and_rank[i];
255 m_level_pointer_and_rank[i] = t;
256 if (sum_blocks > t)
257 {
258 ++m_max_level;
259 last_block_size = sum_blocks - t;
260 }
261 }
262 m_overflow = bit_vector(sum_blocks - last_block_size, 0);
263 m_data.resize(sum_blocks);
264
265 assert(last_block_size > 0);
266
267 // (3) Enter block and overflow data
268 int_vector<64> cnt = m_level_pointer_and_rank;
269 const uint64_t mask = bits::lo_set[t_b];
270
271 for (size_type i = 0, j = 0; i < n; ++i)
272 {
273 val = c[i];
274 j = cnt[0]++;
275 m_data[j] = val & mask;
276 val >>= t_b; // shift value b bits to the right
277 level_x_2 = 2;
278 while (val)
279 {
280 m_overflow[j] = 1;
281 // increase counter for current level by 1
282 j = cnt[level_x_2]++;
283 m_data[j] = val & mask;
284 val >>= t_b; // shift value b bits to the right
285 level_x_2 += 2; // increase level by 1
286 }
287 }
288
289 // (4) Initialize rank data structure for m_overflow and precalc rank for
290 // pointers
291 util::init_support(m_overflow_rank, &m_overflow);
292 for (size_type i = 0;
293 2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size();
294 ++i)
295 {
296 m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]);
297 }
298}
299
300template <uint8_t t_b, typename t_rank>
301template <uint8_t int_width>
303{
304 // (1) Count for each level, how many blocks are needed for the representation
305 // Running time: \f$ O(n \times \frac{\log n}{b} \f$
306 // Result is sorted in m_level_pointer_and_rank
307 size_type n = v_buf.size(), val = 0;
308 if (n == 0)
309 return;
310 // initialize counter
311 m_level_pointer_and_rank = int_vector<64>(128, 0);
312 m_level_pointer_and_rank[0] = n; // level 0 has n entries
313
314 uint8_t level_x_2 = 0;
315 uint8_t max_level_x_2 = 4;
316 for (size_type i = 0; i < n; ++i)
317 {
318 val = v_buf[i];
319 val >>= t_b; // shift value b bits to the right
320 level_x_2 = 2;
321 while (val)
322 {
323 // increase counter for current level by 1
324 ++m_level_pointer_and_rank[level_x_2];
325 val >>= t_b; // shift value b bits to the right
326 level_x_2 += 2; // increase level by 1
327 max_level_x_2 = std::max(max_level_x_2, level_x_2);
328 }
329 }
330 m_level_pointer_and_rank.resize(max_level_x_2);
331 // (2) Determine maximum level and prefix sums of level counters
332 m_max_level = 0;
333 size_type sum_blocks = 0, last_block_size = 0;
334 for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2)
335 {
336 t = sum_blocks;
337 sum_blocks += m_level_pointer_and_rank[i];
338 m_level_pointer_and_rank[i] = t;
339 if (sum_blocks > t)
340 {
341 ++m_max_level;
342 last_block_size = sum_blocks - t;
343 }
344 }
345 m_overflow = bit_vector(sum_blocks - last_block_size, 0);
346 m_data.resize(sum_blocks);
347
348 assert(last_block_size > 0);
349
350 // (3) Enter block and overflow data
351 int_vector<64> cnt = m_level_pointer_and_rank;
352 const uint64_t mask = bits::lo_set[t_b];
353
354 for (size_type i = 0, j = 0; i < n; ++i)
355 {
356 val = v_buf[i];
357 j = cnt[0]++;
358 m_data[j] = val & mask;
359 val >>= t_b; // shift value b bits to the right
360 level_x_2 = 2;
361 while (val)
362 {
363 m_overflow[j] = 1;
364 // increase counter for current level by 1
365 j = cnt[level_x_2]++;
366 m_data[j] = val & mask;
367 val >>= t_b; // shift value b bits to the right
368 level_x_2 += 2; // increase level by 1
369 }
370 }
371
372 // (4) Initialize rank data structure for m_overflow and precalc rank for
373 // pointers
374 util::init_support(m_overflow_rank, &m_overflow);
375 for (size_type i = 0;
376 2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size();
377 ++i)
378 {
379 m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]);
380 }
381}
382
383template <uint8_t t_b, typename t_rank>
385dac_vector<t_b, t_rank>::serialize(std::ostream & out, structure_tree_node * v, std::string name) const
386{
387 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
388 size_type written_bytes = 0;
389 written_bytes += m_data.serialize(out, child, "data");
390 written_bytes += m_overflow.serialize(out, child, "overflow");
391 written_bytes += m_overflow_rank.serialize(out, child, "overflow_rank");
392 written_bytes += m_level_pointer_and_rank.serialize(out, child, "level_pointer_and_rank");
393 written_bytes += write_member(m_max_level, out, child, "max_level");
394 structure_tree::add_size(child, written_bytes);
395 return written_bytes;
396}
397
398template <uint8_t t_b, typename t_rank>
399template <typename archive_t>
401{
402 ar(CEREAL_NVP(m_max_level));
403 ar(CEREAL_NVP(m_data));
404 ar(CEREAL_NVP(m_overflow));
405 ar(CEREAL_NVP(m_overflow_rank));
406 ar(CEREAL_NVP(m_level_pointer_and_rank));
407}
408
409template <uint8_t t_b, typename t_rank>
410template <typename archive_t>
412{
413 ar(CEREAL_NVP(m_max_level));
414 ar(CEREAL_NVP(m_data));
415 ar(CEREAL_NVP(m_overflow));
416 ar(CEREAL_NVP(m_overflow_rank));
417 m_overflow_rank.set_vector(&m_overflow);
418 ar(CEREAL_NVP(m_level_pointer_and_rank));
419}
420
421} // end namespace sdsl
422#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
#define CEREAL_LOAD_FUNCTION_NAME
Definition cereal.hpp:34
#define CEREAL_SAVE_FUNCTION_NAME
Definition cereal.hpp:35
bool empty() const
Returns if the dac_vector is empty.
const value_type const_reference
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
dac_vector(dac_vector &&v)
dac_vector(dac_vector const &v)
bool operator==(dac_vector const &v) const
dac_vector()=default
dac_vector & operator=(dac_vector &&v)
const_iterator iterator
dac_vector & operator=(dac_vector const &v)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the dac_vector to a stream.
int_vector ::size_type size_type
t_rank rank_support_type
const const_iterator end() const
Iterator that points to the position after the last element of the dac_vector.
static size_type max_size()
Return the largest size that this container can ever have.
random_access_const_iterator< dac_vector > const_iterator
const_reference reference
size_type size() const
The number of elements in the dac_vector.
void load(std::istream &in)
Load from a stream.
int_vector ::value_type value_type
value_type operator[](size_type i) const
[]-operator
const pointer const_pointer
bool operator!=(dac_vector const &v) const
const const_iterator begin() const
Iterator that points to the first element of the dac_vector.
ptrdiff_t difference_type
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
const_reference * pointer
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
int_vector_size_type size_type
int_vector_trait< t_width >::value_type value_type
static size_type max_size() noexcept
Maximum size of the int_vector.
Generic iterator for a random access container.
Definition iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
void read_member(T &t, std::istream &in)
Definition io.hpp:119
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
rank_support_v5.hpp contains rank_support_v5.5
Contains declarations and definitions of data structure concepts.
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition bits.hpp:194
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.