Skip to content

Commit

Permalink
Support Utf8View in JSON serialization (#6651)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease authored Oct 30, 2024
1 parent 5dd2b47 commit 1621350
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
12 changes: 12 additions & 0 deletions arrow-json/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ fn make_encoder_impl<'a>(
let array = array.as_string::<i64>();
(Box::new(StringEncoder(array)) as _, array.nulls().cloned())
}
DataType::Utf8View => {
let array = array.as_string_view();
(Box::new(StringViewEncoder(array)) as _, array.nulls().cloned())
}
DataType::List(_) => {
let array = array.as_list::<i32>();
(Box::new(ListEncoder::try_new(array, options)?) as _, array.nulls().cloned())
Expand Down Expand Up @@ -311,6 +315,14 @@ impl<O: OffsetSizeTrait> Encoder for StringEncoder<'_, O> {
}
}

struct StringViewEncoder<'a>(&'a StringViewArray);

impl Encoder for StringViewEncoder<'_> {
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
encode_string(self.0.value(idx), out);
}
}

struct ListEncoder<'a, O: OffsetSizeTrait> {
offsets: OffsetBuffer<O>,
nulls: Option<NullBuffer>,
Expand Down
16 changes: 11 additions & 5 deletions arrow-json/src/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,16 +462,22 @@ mod tests {
}

#[test]
fn write_large_utf8() {
fn write_large_utf8_and_utf8_view() {
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, true),
Field::new("c2", DataType::LargeUtf8, true),
Field::new("c3", DataType::Utf8View, true),
]);

let a = StringArray::from(vec![Some("a"), None, Some("c"), Some("d"), None]);
let b = LargeStringArray::from(vec![Some("a"), Some("b"), None, Some("d"), None]);
let c = StringViewArray::from(vec![Some("a"), Some("b"), None, Some("d"), None]);

let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
let batch = RecordBatch::try_new(
Arc::new(schema),
vec![Arc::new(a), Arc::new(b), Arc::new(c)],
)
.unwrap();

let mut buf = Vec::new();
{
Expand All @@ -481,10 +487,10 @@ mod tests {

assert_json_eq(
&buf,
r#"{"c1":"a","c2":"a"}
{"c2":"b"}
r#"{"c1":"a","c2":"a","c3":"a"}
{"c2":"b","c3":"b"}
{"c1":"c"}
{"c1":"d","c2":"d"}
{"c1":"d","c2":"d","c3":"d"}
{}
"#,
);
Expand Down

0 comments on commit 1621350

Please sign in to comment.