Skip to content

Commit 44f5374

Browse files
committed
handle data batches with too many user ids, handle batches with missing sprite_idx
1 parent 2aa2546 commit 44f5374

File tree

1 file changed

+39
-48
lines changed

1 file changed

+39
-48
lines changed

large-scale-viz/high_perf_character_render/src/data/parquet_reader.rs

Lines changed: 39 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -120,29 +120,16 @@ impl ParquetReader {
120120
let user_col = batch
121121
.column_by_name("user")
122122
.context("Missing user column")?;
123-
let user_dict = user_col
124-
.as_any()
125-
.downcast_ref::<DictionaryArray<Int8Type>>()
126-
.context("Invalid user column type")?;
127-
let user_values = user_dict
128-
.values()
129-
.as_any()
130-
.downcast_ref::<StringArray>()
131-
.context("Invalid user values type")?;
132123

133124
let env_id_col = batch
134125
.column_by_name("env_id")
135126
.context("Missing env_id column")?;
136127

137-
let sprite_id_col = batch
128+
// sprite_id column is optional - some files may not have it
129+
// Default to 0 if missing
130+
let sprite_id_dict_opt = batch
138131
.column_by_name("sprite_id")
139-
.context("Missing sprite_id column")?;
140-
141-
// sprite_id is a Dictionary<Int8, Float64 or String>
142-
let sprite_id_dict = sprite_id_col
143-
.as_any()
144-
.downcast_ref::<DictionaryArray<Int8Type>>()
145-
.context("Invalid sprite_id column type")?;
132+
.and_then(|col| col.as_any().downcast_ref::<DictionaryArray<Int8Type>>());
146133

147134
// Skip color and extra - they're not used in extraction
148135

@@ -175,11 +162,10 @@ impl ParquetReader {
175162
}
176163

177164
// Extract user - skip row if null
178-
if user_dict.is_null(i) {
179-
continue;
180-
}
181-
let user_key = user_dict.key(i).context("Invalid user key")?;
182-
let user = user_values.value(user_key as usize).to_string();
165+
let user = match get_dict_string(user_col.as_ref(), i)? {
166+
Some(s) => s,
167+
None => continue,
168+
};
183169

184170
// Apply user filter
185171
if let Some(regex) = &self.filter.user_regex {
@@ -195,35 +181,40 @@ impl ParquetReader {
195181
};
196182

197183
// Extract sprite_id - match JS logic exactly:
198-
// Default to 0, and only use value if > 0 and < 50
199-
let sprite_id = if sprite_id_dict.is_null(i) {
200-
0
201-
} else {
202-
let key = sprite_id_dict.key(i).context("Invalid sprite_id key")?;
203-
let sprite_id_raw = if let Some(float_values) = sprite_id_dict
204-
.values()
205-
.as_any()
206-
.downcast_ref::<Float64Array>()
207-
{
208-
// Float64 values
209-
float_values.value(key as usize) as i32
210-
} else if let Some(string_values) = sprite_id_dict
211-
.values()
212-
.as_any()
213-
.downcast_ref::<StringArray>()
214-
{
215-
// String values - parse to int
216-
string_values.value(key as usize).parse::<i32>().unwrap_or(0)
217-
} else {
218-
// Unknown type, default to 0
184+
// Default to 0 if column missing, null, or value out of range
185+
let sprite_id = if let Some(sprite_id_dict) = sprite_id_dict_opt {
186+
if sprite_id_dict.is_null(i) {
219187
0
220-
};
221-
222-
if sprite_id_raw > 0 && sprite_id_raw < 50 {
223-
sprite_id_raw as u8
224188
} else {
225-
0
189+
let key = sprite_id_dict.key(i).context("Invalid sprite_id key")?;
190+
let sprite_id_raw = if let Some(float_values) = sprite_id_dict
191+
.values()
192+
.as_any()
193+
.downcast_ref::<Float64Array>()
194+
{
195+
// Float64 values
196+
float_values.value(key as usize) as i32
197+
} else if let Some(string_values) = sprite_id_dict
198+
.values()
199+
.as_any()
200+
.downcast_ref::<StringArray>()
201+
{
202+
// String values - parse to int
203+
string_values.value(key as usize).parse::<i32>().unwrap_or(0)
204+
} else {
205+
// Unknown type, default to 0
206+
0
207+
};
208+
209+
if sprite_id_raw > 0 && sprite_id_raw < 50 {
210+
sprite_id_raw as u8
211+
} else {
212+
0
213+
}
226214
}
215+
} else {
216+
// Column doesn't exist, default to 0
217+
0
227218
};
228219

229220
// Skip color and extra - not used

0 commit comments

Comments
 (0)