From 4cd04cc8725fb0010a279cbcda7ea42d8ec58943 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 29 Mar 2022 08:32:44 +0200 Subject: [PATCH] decoder: handle empty non-string types For legacy reasons, non-string types support "empty" values, e.g. you can have an empty int, distinct from NULL. This case used to trigger deserialization errors, so now it's simply a fallback to NULL. Another option is to introduce a new "empty" state that a value can be in, but it's semantically no different from NULL, even though it's serialized in a different way. Ref: https://github.com/apache/cassandra/blob/7b58b79fe24c612f8bbf7984c325e88496f482d6/doc/native_protocol_v4.spec#L861-L863 --- src/decoder.cpp | 10 ++++++ tests/src/unit/tests/test_decoder.cpp | 46 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/src/decoder.cpp b/src/decoder.cpp index 01cf38ae6..a9dc55acf 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -149,6 +149,16 @@ Value Decoder::decode_value(const DataType::ConstPtr& data_type) { int32_t count = 0; if (!data_type->is_collection()) { + if (!is_string_type(data_type->value_type()) && !is_bytes_type(data_type->value_type()) + && size == 0) { + // For legacy reasons, non-string types support "empty" values, e.g. you can have + // an empty int, distinct from NULL. This case used to trigger deserialization errors, + // so now it's simply a fallback to NULL. Another option is to introduce a new "empty" + // state that a value can be in, but it's semantically no different from NULL, + // even though it's serialized in a different way. + // Ref: https://github.com/apache/cassandra/blob/7b58b79fe24c612f8bbf7984c325e88496f482d6/doc/native_protocol_v4.spec#L861-L863 + return Value(data_type); + } return Value(data_type, decoder); } else if (decoder.decode_int32(count)) { return Value(data_type, count, decoder); diff --git a/tests/src/unit/tests/test_decoder.cpp b/tests/src/unit/tests/test_decoder.cpp index e75856bec..ac372f31a 100644 --- a/tests/src/unit/tests/test_decoder.cpp +++ b/tests/src/unit/tests/test_decoder.cpp @@ -17,6 +17,7 @@ #include #include "decoder.hpp" +#include "value.hpp" #include "logger.hpp" using namespace datastax; @@ -1002,3 +1003,48 @@ TEST_F(DecoderUnitTest, DecodeWarnings) { ASSERT_FALSE(decoder.decode_warnings(value)); ASSERT_TRUE(failure_logged_); } + +TEST_F(DecoderUnitTest, DecodeEmpty) { + CassValueType nonzero_length_scalars[] = { + CASS_VALUE_TYPE_BIGINT, + CASS_VALUE_TYPE_BOOLEAN, + CASS_VALUE_TYPE_COUNTER, + CASS_VALUE_TYPE_DECIMAL, + CASS_VALUE_TYPE_DOUBLE, + CASS_VALUE_TYPE_FLOAT, + CASS_VALUE_TYPE_INT, + CASS_VALUE_TYPE_TIMESTAMP, + CASS_VALUE_TYPE_UUID, + CASS_VALUE_TYPE_TIMEUUID, + CASS_VALUE_TYPE_INET, + CASS_VALUE_TYPE_DATE, + CASS_VALUE_TYPE_TIME, + CASS_VALUE_TYPE_SMALL_INT, + CASS_VALUE_TYPE_TINY_INT, + CASS_VALUE_TYPE_DURATION, + }; + CassValueType variable_length_scalars[] = { + CASS_VALUE_TYPE_CUSTOM, + CASS_VALUE_TYPE_ASCII, + CASS_VALUE_TYPE_BLOB, + CASS_VALUE_TYPE_TEXT, + CASS_VALUE_TYPE_VARCHAR, + CASS_VALUE_TYPE_VARINT, + }; + for (CassValueType t : nonzero_length_scalars) { + const char input[4] = { 0, 0, 0, 0 }; + TestDecoder decoder((const char*)input, 4); + DataType::ConstPtr type_ptr(new DataType(t)); + Value val = decoder.decode_value(type_ptr); + // Empty types are deserialized without errors and treated as null + ASSERT_TRUE(val.is_null()); + } + for (CassValueType t : variable_length_scalars) { + const char input[4] = { 0, 0, 0, 0 }; + TestDecoder decoder((const char*)input, 4); + DataType::ConstPtr type_ptr(new DataType(t)); + Value val = decoder.decode_value(type_ptr); + // Empty types with variable size are simply empty, not null + ASSERT_FALSE(val.is_null()); + } +} \ No newline at end of file