Skip to content

Commit

Permalink
FlatBuffers 64 for C++ (google#7935)
Browse files Browse the repository at this point in the history
* First working hack of adding 64-bit. Don't judge :)

* Made vector_downward work on 64 bit types

* vector_downward uses size_t, added offset64 to reflection

* cleaned up adding offset64 in parser

* Add C++ testing skeleton for 64-bit

* working test for CreateVector64

* working >2 GiB buffers

* support for large strings

* simplified CreateString<> to just provide the offset type

* generalize CreateVector template

* update test_64.afb due to upstream format change

* Added Vector64 type, which is just an alias for vector ATM

* Switch to Offset64 for Vector64

* Update for reflection bfbs output change

* Starting to add support for vector64 type in C++

* made a generic CreateVector that can handle different offsets and vector types

* Support for 32-vector with 64-addressing

* Vector64 basic builder + tests working

* basic support for json vector64 support

* renamed fields in test_64bit.fbs to better reflect their use

* working C++ vector64 builder

* Apply --annotate-sparse-vector to 64-bit tests

* Enable Vector64 for --annotate-sparse-vectors

* Merged from upstream

* Add `near_string` field for testing 32-bit offsets alongside

* keep track of where the 32-bit and 64-bit regions are for flatbufferbuilder

* move template<> outside class body for GCC

* update run.sh to build and run tests

* basic assertion for adding 64-bit offset at the wrong time

* started to separate `FlatBufferBuilder` into two classes, 1 64-bit aware, the other not

* add test for nested flatbuffer vector64, fix bug in alignment of big vectors

* fixed CreateDirect method by iterating by Offset64 first

* internal refactoring of flatbufferbuilder

* block not supported languages in the parser from using 64-bit

* evolution tests for adding a vector64 field

* conformity tests for adding/removing offset64 attributes

* ensure test is for a big buffer

* add parser error tests for `offset64` and `vector64` attributes

* add missing static that GCC only complains about

* remove stdint-uintn.h header that gets automatically added

* move 64-bit CalculateOffset internal

* fixed return size of EndVector

* various fixes on windows

* add SizeT to vector_downward

* minimze range of size changes in vector and builder

* reworked how tracking if 64-offsets are added

* Add ReturnT to EndVector

* small cleanups

* remove need for second Array definition

* combine IndirectHelpers into one definition

* started support for vector of struct

* Support for 32/64-vectors of structs + Offset64

* small cleanups

* add verification for vector64

* add sized prefix for 64-bit buffers

* add fuzzer for 64-bit

* add example of adding many vectors using a wrapper table

* run the new -bfbs-gen-embed logic on the 64-bit tests

* remove run.sh and fix cmakelist issue

* fixed bazel rules

* fixed some PR comments

* add 64-bit tests to cmakelist
  • Loading branch information
dbaileychess authored and Jochen Parmentier committed Oct 29, 2024
1 parent 26c05fe commit 0f7c568
Show file tree
Hide file tree
Showing 49 changed files with 3,270 additions and 525 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ set(FlatBuffers_Tests_SRCS
tests/native_type_test_impl.cpp
tests/alignment_test.h
tests/alignment_test.cpp
tests/64bit/offset64_test.h
tests/64bit/offset64_test.cpp
include/flatbuffers/code_generators.h
src/code_generators.cpp
)
Expand Down Expand Up @@ -527,6 +529,9 @@ if(FLATBUFFERS_BUILD_TESTS)
compile_schema_for_test(tests/native_inline_table_test.fbs "${FLATC_OPT_COMP}")
compile_schema_for_test(tests/native_type_test.fbs "${FLATC_OPT}")
compile_schema_for_test(tests/key_field/key_field_sample.fbs "${FLATC_OPT_COMP}")
compile_schema_for_test(tests/64bit/test_64bit.fbs "${FLATC_OPT_COMP};--bfbs-gen-embed")
compile_schema_for_test(tests/64bit/evolution/v1.fbs "${FLATC_OPT_COMP}")
compile_schema_for_test(tests/64bit/evolution/v2.fbs "${FLATC_OPT_COMP}")

if(FLATBUFFERS_CODE_SANITIZE)
add_fsanitize_to_target(flattests ${FLATBUFFERS_CODE_SANITIZE})
Expand Down
7 changes: 5 additions & 2 deletions include/flatbuffers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#ifndef FLATBUFFERS_ARRAY_H_
#define FLATBUFFERS_ARRAY_H_

#include <cstdint>
#include <memory>

#include "flatbuffers/base.h"
Expand All @@ -37,7 +38,7 @@ template<typename T, uint16_t length> class Array {
public:
typedef uint16_t size_type;
typedef typename IndirectHelper<IndirectHelperType>::return_type return_type;
typedef VectorConstIterator<T, return_type> const_iterator;
typedef VectorConstIterator<T, return_type, uoffset_t> const_iterator;
typedef VectorReverseIterator<const_iterator> const_reverse_iterator;

// If T is a LE-scalar or a struct (!scalar_tag::value).
Expand Down Expand Up @@ -158,11 +159,13 @@ template<typename T, uint16_t length> class Array {

// Specialization for Array[struct] with access using Offset<void> pointer.
// This specialization used by idl_gen_text.cpp.
template<typename T, uint16_t length> class Array<Offset<T>, length> {
template<typename T, uint16_t length, template<typename> class OffsetT>
class Array<OffsetT<T>, length> {
static_assert(flatbuffers::is_same<T, void>::value, "unexpected type T");

public:
typedef const void *return_type;
typedef uint16_t size_type;

const uint8_t *Data() const { return data_; }

Expand Down
6 changes: 5 additions & 1 deletion include/flatbuffers/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <vector>
#include <set>
#include <algorithm>
#include <limits>
#include <iterator>
#include <memory>

Expand Down Expand Up @@ -323,9 +324,11 @@ namespace flatbuffers {
// Also, using a consistent offset type maintains compatibility of serialized
// offset values between 32bit and 64bit systems.
typedef uint32_t uoffset_t;
typedef uint64_t uoffset64_t;

// Signed offsets for references that can go in both directions.
typedef int32_t soffset_t;
typedef int64_t soffset64_t;

// Offset/index used in v-tables, can be changed to uint8_t in
// format forks to save a bit of space if desired.
Expand All @@ -334,7 +337,8 @@ typedef uint16_t voffset_t;
typedef uintmax_t largest_scalar_t;

// In 32bits, this evaluates to 2GB - 1
#define FLATBUFFERS_MAX_BUFFER_SIZE ((1ULL << (sizeof(::flatbuffers::soffset_t) * 8 - 1)) - 1)
#define FLATBUFFERS_MAX_BUFFER_SIZE std::numeric_limits<::flatbuffers::soffset_t>::max()
#define FLATBUFFERS_MAX_64_BUFFER_SIZE std::numeric_limits<::flatbuffers::soffset64_t>::max()

// The minimum size buffer that can be a valid flatbuffer.
// Includes the offset to the root table (uoffset_t), the offset to the vtable
Expand Down
93 changes: 69 additions & 24 deletions include/flatbuffers/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,33 @@ namespace flatbuffers {

// Wrapper for uoffset_t to allow safe template specialization.
// Value is allowed to be 0 to indicate a null object (see e.g. AddOffset).
template<typename T> struct Offset {
uoffset_t o;
template<typename T = void> struct Offset {
// The type of offset to use.
typedef uoffset_t offset_type;

offset_type o;
Offset() : o(0) {}
Offset(uoffset_t _o) : o(_o) {}
Offset<void> Union() const { return Offset<void>(o); }
Offset(const offset_type _o) : o(_o) {}
Offset<> Union() const { return o; }
bool IsNull() const { return !o; }
};

// Wrapper for uoffset64_t Offsets.
template<typename T = void> struct Offset64 {
// The type of offset to use.
typedef uoffset64_t offset_type;

offset_type o;
Offset64() : o(0) {}
Offset64(const offset_type offset) : o(offset) {}
Offset64<> Union() const { return o; }
bool IsNull() const { return !o; }
};

// Litmus check for ensuring the Offsets are the expected size.
static_assert(sizeof(Offset<>) == 4, "Offset has wrong size");
static_assert(sizeof(Offset64<>) == 8, "Offset64 has wrong size");

inline void EndianCheck() {
int endiantest = 1;
// If this fails, see FLATBUFFERS_LITTLEENDIAN above.
Expand Down Expand Up @@ -75,35 +94,59 @@ template<typename T> struct IndirectHelper {
typedef T return_type;
typedef T mutable_return_type;
static const size_t element_stride = sizeof(T);
static return_type Read(const uint8_t *p, uoffset_t i) {

static return_type Read(const uint8_t *p, const size_t i) {
return EndianScalar((reinterpret_cast<const T *>(p))[i]);
}
static return_type Read(uint8_t *p, uoffset_t i) {
return Read(const_cast<const uint8_t *>(p), i);
static mutable_return_type Read(uint8_t *p, const size_t i) {
return reinterpret_cast<mutable_return_type>(
Read(const_cast<const uint8_t *>(p), i));
}
};
template<typename T> struct IndirectHelper<Offset<T>> {

// For vector of Offsets.
template<typename T, template<typename> class OffsetT>
struct IndirectHelper<OffsetT<T>> {
typedef const T *return_type;
typedef T *mutable_return_type;
static const size_t element_stride = sizeof(uoffset_t);
static return_type Read(const uint8_t *p, uoffset_t i) {
p += i * sizeof(uoffset_t);
return reinterpret_cast<return_type>(p + ReadScalar<uoffset_t>(p));
typedef typename OffsetT<T>::offset_type offset_type;
static const offset_type element_stride = sizeof(offset_type);

static return_type Read(const uint8_t *const p, const offset_type i) {
// Offsets are relative to themselves, so first update the pointer to
// point to the offset location.
const uint8_t *const offset_location = p + i * element_stride;

// Then read the scalar value of the offset (which may be 32 or 64-bits) and
// then determine the relative location from the offset location.
return reinterpret_cast<return_type>(
offset_location + ReadScalar<offset_type>(offset_location));
}
static mutable_return_type Read(uint8_t *p, uoffset_t i) {
p += i * sizeof(uoffset_t);
return reinterpret_cast<mutable_return_type>(p + ReadScalar<uoffset_t>(p));
static mutable_return_type Read(uint8_t *const p, const offset_type i) {
// Offsets are relative to themselves, so first update the pointer to
// point to the offset location.
uint8_t *const offset_location = p + i * element_stride;

// Then read the scalar value of the offset (which may be 32 or 64-bits) and
// then determine the relative location from the offset location.
return reinterpret_cast<mutable_return_type>(
offset_location + ReadScalar<offset_type>(offset_location));
}
};

// For vector of structs.
template<typename T> struct IndirectHelper<const T *> {
typedef const T *return_type;
typedef T *mutable_return_type;
static const size_t element_stride = sizeof(T);
static return_type Read(const uint8_t *p, uoffset_t i) {
return reinterpret_cast<return_type>(p + i * sizeof(T));

static return_type Read(const uint8_t *const p, const size_t i) {
// Structs are stored inline, relative to the first struct pointer.
return reinterpret_cast<return_type>(p + i * element_stride);
}
static mutable_return_type Read(uint8_t *p, uoffset_t i) {
return reinterpret_cast<mutable_return_type>(p + i * sizeof(T));
static mutable_return_type Read(uint8_t *const p, const size_t i) {
// Structs are stored inline, relative to the first struct pointer.
return reinterpret_cast<mutable_return_type>(p + i * element_stride);
}
};

Expand All @@ -130,23 +173,25 @@ inline bool BufferHasIdentifier(const void *buf, const char *identifier,
/// @cond FLATBUFFERS_INTERNAL
// Helpers to get a typed pointer to the root object contained in the buffer.
template<typename T> T *GetMutableRoot(void *buf) {
if (!buf) return nullptr;
EndianCheck();
return reinterpret_cast<T *>(
reinterpret_cast<uint8_t *>(buf) +
EndianScalar(*reinterpret_cast<uoffset_t *>(buf)));
}

template<typename T> T *GetMutableSizePrefixedRoot(void *buf) {
return GetMutableRoot<T>(reinterpret_cast<uint8_t *>(buf) +
sizeof(uoffset_t));
template<typename T, typename SizeT = uoffset_t>
T *GetMutableSizePrefixedRoot(void *buf) {
return GetMutableRoot<T>(reinterpret_cast<uint8_t *>(buf) + sizeof(SizeT));
}

template<typename T> const T *GetRoot(const void *buf) {
return GetMutableRoot<T>(const_cast<void *>(buf));
}

template<typename T> const T *GetSizePrefixedRoot(const void *buf) {
return GetRoot<T>(reinterpret_cast<const uint8_t *>(buf) + sizeof(uoffset_t));
template<typename T, typename SizeT = uoffset_t>
const T *GetSizePrefixedRoot(const void *buf) {
return GetRoot<T>(reinterpret_cast<const uint8_t *>(buf) + sizeof(SizeT));
}

} // namespace flatbuffers
Expand Down
Loading

0 comments on commit 0f7c568

Please sign in to comment.