From 6cff848ae4d08ade46a595ae81bef6f373e3db9b Mon Sep 17 00:00:00 2001
From: Frederic Branczyk <fbranczyk@gmail.com>
Date: Mon, 23 Sep 2024 18:01:44 +0200
Subject: [PATCH] arrow-ipc: Read dictionary IDs from dictionary tracker in
 correct order

When dictionary IDs are not preserved, then they are assigned depth
first, however, when reading them from the dictionary tracker to write
the IPC bytes, they were previously read from the dictionary tracker in
the order that the schema is traversed (first come first serve), which
caused an incorrect order of dictionaries serialized in IPC.
---
 arrow-ipc/src/writer.rs | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index 6970b782763c..d5971e49fb58 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -364,13 +364,6 @@ impl IpcDataGenerator {
     ) -> Result<(), ArrowError> {
         match column.data_type() {
             DataType::Dictionary(_key_type, _value_type) => {
-                let dict_id = dict_id_seq
-                    .next()
-                    .or_else(|| field.dict_id())
-                    .ok_or_else(|| {
-                        ArrowError::IpcError(format!("no dict id for field {}", field.name()))
-                    })?;
-
                 let dict_data = column.to_data();
                 let dict_values = &dict_data.child_data()[0];
 
@@ -384,6 +377,16 @@ impl IpcDataGenerator {
                     dict_id_seq,
                 )?;
 
+                // It's importnat to only take the dict_id at this point, because the dict ID
+                // sequence is assigned depth-first, so we need to first encode children and have
+                // them take their assigned dict IDs before we take the dict ID for this field.
+                let dict_id = dict_id_seq
+                    .next()
+                    .or_else(|| field.dict_id())
+                    .ok_or_else(|| {
+                        ArrowError::IpcError(format!("no dict id for field {}", field.name()))
+                    })?;
+
                 let emit = dictionary_tracker.insert(dict_id, column)?;
 
                 if emit {