-
-
Notifications
You must be signed in to change notification settings - Fork 7k
Add keep parameter to dump to copy invalid UTF-8 bytes as-is #4555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 15 commits
4d67e12
851584e
3665dab
1a76a2c
3db5cc4
9f73bc1
a27a5b5
a2d828c
7d2a83b
a6a06b7
3cd5025
15ff370
e9876d9
b167096
493d1e4
4ab98c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,7 +44,8 @@ enum class error_handler_t | |
{ | ||
strict, ///< throw a type_error exception in case of invalid UTF-8 | ||
replace, ///< replace invalid UTF-8 sequences with U+FFFD | ||
ignore ///< ignore invalid UTF-8 sequences | ||
ignore, ///< ignore invalid UTF-8 sequences | ||
keep ///< keep invalid UTF-8 sequences | ||
}; | ||
|
||
template<typename BasicJsonType> | ||
|
@@ -398,6 +399,13 @@ class serializer | |
std::size_t bytes_after_last_accept = 0; | ||
std::size_t undumped_chars = 0; | ||
|
||
// copy string as-is if error handler is set to keep, and we don't want to ensure ASCII | ||
if (error_handler == error_handler_t::keep && !ensure_ascii) | ||
{ | ||
o->write_characters(s.data(), s.size()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just for me to understand, how would this behave exactly? If there is a I think the important thing is to not break the json format. And also what about other UTF-8 accepted chars? Like I have limited access these days (from mobile), and I don't know exactly the purpose of Thank you There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, you're right! Just copying the input to the output is wrong here, because valid characters like LF that must be escaped to |
||
return; | ||
} | ||
|
||
for (std::size_t i = 0; i < s.size(); ++i) | ||
{ | ||
const auto byte = static_cast<std::uint8_t>(s[i]); | ||
|
@@ -567,7 +575,23 @@ class serializer | |
break; | ||
} | ||
|
||
default: // LCOV_EXCL_LINE | ||
case error_handler_t::keep: | ||
{ | ||
// copy undumped chars to string buffer | ||
for (std::size_t j = 0; j < undumped_chars; ++j) | ||
{ | ||
string_buffer[bytes++] = s[bytes_after_last_accept + j]; | ||
} | ||
|
||
// add erroneous byte to string buffer | ||
string_buffer[bytes++] = s[i]; | ||
|
||
// continue processing the string | ||
state = UTF8_ACCEPT; | ||
break; | ||
} | ||
|
||
default: // LCOV_EXCL_LINE | ||
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE | ||
} | ||
break; | ||
|
@@ -605,6 +629,20 @@ class serializer | |
JSON_THROW(type_error::create(316, concat("incomplete UTF-8 string; last byte: 0x", hex_bytes(static_cast<std::uint8_t>(s.back() | 0))), nullptr)); | ||
} | ||
|
||
case error_handler_t::keep: | ||
{ | ||
// copy undumped chars to string buffer | ||
for (std::size_t j = 0; j < undumped_chars; ++j) | ||
{ | ||
string_buffer[bytes++] = s[bytes_after_last_accept + j]; | ||
} | ||
undumped_chars = 0; | ||
|
||
// write all accepted bytes | ||
o->write_characters(string_buffer.data(), bytes); | ||
break; | ||
} | ||
|
||
case error_handler_t::ignore: | ||
{ | ||
// write all accepted bytes | ||
|
@@ -628,8 +666,8 @@ class serializer | |
break; | ||
} | ||
|
||
default: // LCOV_EXCL_LINE | ||
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE | ||
default: // LCOV_EXCL_LINE | ||
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE | ||
} | ||
} | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.