forked from jcrobak/parquet-python
-
-
Notifications
You must be signed in to change notification settings - Fork 188
Open
Description
Describe the issue: to_pandas()
crashes when encountering NA values in an INT64 OPTIONAL column (cycle in this parquet file)
Minimal Complete Verifiable Example:
import fastparquet as fp
pf = fp.ParquetFile("ffg_partial.parquet")
pf.to_pandas()
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[71], [line 1](vscode-notebook-cell:?execution_count=71&line=1)
----> [1](vscode-notebook-cell:?execution_count=71&line=1) df = pf.to_pandas()
File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/api.py:790, in ParquetFile.to_pandas(self, columns, categories, filters, index, row_filter, dtypes)
[786](.../fastparquet/api.py:786) continue
[787](.../fastparquet/api.py:787) parts = {name: (v if name.endswith('-catdef')
[788](.../fastparquet/api.py:788) else v[start:start + thislen])
[789](.../fastparquet/api.py:789) for (name, v) in views.items()}
--> [790](.../fastparquet/api.py:790) self.read_row_group_file(rg, columns, categories, index,
[791](.../fastparquet/api.py:791) assign=parts, partition_meta=self.partition_meta,
[792](.../fastparquet/api.py:792) row_filter=sel, infile=infile)
[793](.../fastparquet/api.py:793) start += thislen
[794](.../fastparquet/api.py:794) return df
File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/api.py:388, in ParquetFile.read_row_group_file(self, rg, columns, categories, index, assign, partition_meta, row_filter, infile)
[385](.../fastparquet/api.py:385) ret = True
[386](.../fastparquet/api.py:386) f = infile or self.open(fn, mode='rb')
--> [388](.../fastparquet/api.py:388) core.read_row_group(
[389](.../fastparquet/api.py:389) f, rg, columns, categories, self.schema, self.cats,
[390](.../fastparquet/api.py:390) selfmade=self.selfmade, index=index,
[391](.../fastparquet/api.py:391) assign=assign, scheme=self.file_scheme, partition_meta=partition_meta,
[392](.../fastparquet/api.py:392) row_filter=row_filter
[393](.../fastparquet/api.py:393) )
[394](.../fastparquet/api.py:394) if ret:
[395](.../fastparquet/api.py:395) return df
File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/core.py:644, in read_row_group(file, rg, columns, categories, schema_helper, cats, selfmade, index, assign, scheme, partition_meta, row_filter)
[642](.../fastparquet/core.py:642) if assign is None:
[643](.../fastparquet/core.py:643) raise RuntimeError('Going with pre-allocation!')
--> [644](.../fastparquet/core.py:644) read_row_group_arrays(file, rg, columns, categories, schema_helper,
[645](.../fastparquet/core.py:645) cats, selfmade, assign=assign, row_filter=row_filter)
[647](.../fastparquet/core.py:647) for cat in cats:
[648](.../fastparquet/core.py:648) if cat not in assign:
[649](.../fastparquet/core.py:649) # do no need to have partition columns in output
File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/core.py:615, in read_row_group_arrays(file, rg, columns, categories, schema_helper, cats, selfmade, assign, row_filter)
[612](.../fastparquet/core.py:612) continue
[613](.../fastparquet/core.py:613) remains.discard(name)
--> [615](.../fastparquet/core.py:615) read_col(column, schema_helper, file, use_cat=name+'-catdef' in out,
[616](.../fastparquet/core.py:616) selfmade=selfmade, assign=out[name],
[617](.../fastparquet/core.py:617) catdef=out.get(name+'-catdef', None),
[618](.../fastparquet/core.py:618) row_filter=row_filter)
[620](.../fastparquet/core.py:620) if _is_map_like(schema_helper, column):
[621](.../fastparquet/core.py:621) # TODO: could be done in fast loop in _assemble_objects?
[622](.../fastparquet/core.py:622) if name not in maps:
File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/core.py:561, in read_col(column, schema_helper, infile, use_cat, selfmade, assign, catdef, row_filter)
[559](.../fastparquet/core.py:559) part = part._data
[560](.../fastparquet/core.py:560) elif part.dtype.kind != "O":
--> [561](.../fastparquet/core.py:561) part[defi != max_defi] = my_nan
[562](.../fastparquet/core.py:562) if d and not use_cat:
[563](.../fastparquet/core.py:563) part[defi == max_defi] = dic[val]
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'
I cannot share the parquet file, so hopefully the following information is sufficient to debug the issue.
> pf.selfmade
True
> pf.dtypes
OrderedDict([('net', 'category'),
('cycle', dtype('int64')),
('edge', 'category'),
('time20', dtype('float64')),
('time50', dtype('float64')),
('time80', dtype('float64')),
('global_variation', 'category'),
('sweep_index', 'category'),
('temp', 'category'),
('vdd', 'category'),
('vdd_imc', 'category'),
('vss', 'category')])
> print(pf.schema)
- schema:
| - net: BYTE_ARRAY, UTF8, OPTIONAL
| - cycle: INT64, OPTIONAL
| - edge: BYTE_ARRAY, UTF8, OPTIONAL
| - time20: DOUBLE, OPTIONAL
| - time50: DOUBLE, OPTIONAL
| - time80: DOUBLE, OPTIONAL
| - global_variation: BYTE_ARRAY, UTF8, OPTIONAL
| - sweep_index: INT64, OPTIONAL
| - temp: INT64, OPTIONAL
| - vdd: DOUBLE, OPTIONAL
| - vdd_imc: DOUBLE, OPTIONAL
- vss: INT64, OPTIONAL
pf.statistics
{'min': {'net': [None],
'cycle': [2, 0, 2, 0],
'edge': [None],
'time20': [1.4773426876688636e-10,
1.0559080883552939e-10,
7.139149389929003e-11,
9.52384645968221e-11],
'time50': [1.5455092029759065e-10,
1.3213713977167085e-19,
1.672420481250059e-10,
1.3213713977167085e-19],
'time80': [1.473934018947706e-10,
3.0000002099352238e-12,
1.3538265149883599e-10,
3.0000002099352238e-12],
'global_variation': [None],
'sweep_index': [None, None, None, None],
'temp': [None, None, None, None],
'vdd': [None, None, None, None],
'vdd_imc': [None, None, None, None],
'vss': [None, None, None, None]},
'max': {'net': [None],
'cycle': [1024, 1033, 1024, 1033],
'edge': [None],
'time20': [6.524647908841792e-10,
9.998369878836728e-09,
6.761692109951692e-10,
9.99867577012857e-09],
'time50': [6.316129033313601e-10,
6.376947296995981e-10,
6.755620766233663e-10,
6.880282769349157e-10],
'time80': [6.419314096761683e-10,
9.11409428754411e-09,
6.881378599573002e-10,
6.971354202572327e-10],
'global_variation': [None],
'sweep_index': [None, None, None, None],
'temp': [None, None, None, None],
'vdd': [None, None, None, None],
'vdd_imc': [None, None, None, None],
'vss': [None, None, None, None]},
'null_count': {'net': [0, 0, 0, 0],
'cycle': [0, 7, 0, 7],
'edge': [0, 0, 0, 0],
'time20': [3540142, 773119, 2661380, 2569291],
'time50': [5093064, 1212040, 4006276, 3655088],
'time80': [6684602, 1666286, 5130506, 4509842],
'global_variation': [0, 0, 0, 0],
'sweep_index': [0, 0, 0, 0],
'temp': [0, 0, 0, 0],
'vdd': [0, 0, 0, 0],
'vdd_imc': [0, 0, 0, 0],
'vss': [0, 0, 0, 0]},
'distinct_count': {'net': [None, None, None, None],
'cycle': [None, None, None, None],
'edge': [None, None, None, None],
'time20': [None, None, None, None],
'time50': [None, None, None, None],
'time80': [None, None, None, None],
'global_variation': [None, None, None, None],
'sweep_index': [None, None, None, None],
'temp': [None, None, None, None],
'vdd': [None, None, None, None],
'vdd_imc': [None, None, None, None],
'vss': [None, None, None, None]}}
pf.row_groups
[columns:
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 4
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 1614374
dictionary_page_offset: 4
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1307603'
- key: b'numpy_dtype'
value: b'int32'
- key: b'label_dtype'
value: b'object'
num_values: 10127116
path_in_schema:
- net
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 2104220
total_uncompressed_size: 166005828
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 2104224
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 2104224
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 10127116
path_in_schema:
- cycle
statistics:
distinct_count: null
max: b'\x00\x04\x00\x00\x00\x00\x00\x00'
max_value: null
min: b'\x02\x00\x00\x00\x00\x00\x00\x00'
min_value: null
null_count: 0
total_compressed_size: 7843627
total_uncompressed_size: 81016971
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 9947851
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 9947889
dictionary_page_offset: 9947851
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 10127116
path_in_schema:
- edge
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 1052716
total_uncompressed_size: 10127193
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 11000567
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 11000567
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 10127116
path_in_schema:
- time20
statistics:
distinct_count: null
max: b'\x00`4\x18$k\x06>'
max_value: null
min: b'\x00\x80\xdb\xfe\xefM\xe4='
min_value: null
null_count: 3540142
total_compressed_size: 40359107
total_uncompressed_size: 53961724
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 51359674
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 51359674
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 10127116
path_in_schema:
- time50
statistics:
distinct_count: null
max: b'\x00@k\xd1\xb9\xb3\x05>'
max_value: null
min: b'\x00\xc0"\xef\xc6=\xe5='
min_value: null
null_count: 5093064
total_compressed_size: 31180098
total_uncompressed_size: 41538348
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 82539772
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 82539772
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 10127116
path_in_schema:
- time80
statistics:
distinct_count: null
max: b'\x00`Y\x07}\x0e\x06>'
max_value: null
min: b'\x00\x00]\xbd\xf1A\xe4='
min_value: null
null_count: 6684602
total_compressed_size: 21626096
total_uncompressed_size: 28806044
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 104165868
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 104165897
dictionary_page_offset: 104165868
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 10127116
path_in_schema:
- global_variation
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 393
total_uncompressed_size: 10127182
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 104166261
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 104166299
dictionary_page_offset: 104166261
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 10127116
path_in_schema:
- sweep_index
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 402
total_uncompressed_size: 10127191
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 104166663
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 104166701
dictionary_page_offset: 104166663
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 10127116
path_in_schema:
- temp
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 402
total_uncompressed_size: 10127191
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 104167065
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 104167095
dictionary_page_offset: 104167065
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 10127116
path_in_schema:
- vdd
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 394
total_uncompressed_size: 10127183
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 104167459
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 104167489
dictionary_page_offset: 104167459
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 10127116
path_in_schema:
- vdd_imc
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 394
total_uncompressed_size: 10127183
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 104167853
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 104167883
dictionary_page_offset: 104167853
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 10127116
path_in_schema:
- vss
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 394
total_uncompressed_size: 10127183
type: 2
offset_index_length: null
offset_index_offset: null
file_offset: null
num_rows: 10127116
ordinal: null
sorting_columns: null
total_byte_size: 442219221
total_compressed_size: null,
columns:
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 104168247
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 105782617
dictionary_page_offset: 104168247
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1307603'
- key: b'numpy_dtype'
value: b'int32'
- key: b'label_dtype'
value: b'object'
num_values: 2412984
path_in_schema:
- net
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 1730307
total_uncompressed_size: 135149299
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 105898554
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 105898554
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 2412984
path_in_schema:
- cycle
statistics:
distinct_count: null
max: b'\t\x04\x00\x00\x00\x00\x00\x00'
max_value: null
min: b'\x00\x00\x00\x00\x00\x00\x00\x00'
min_value: null
null_count: 7
total_compressed_size: 1892296
total_uncompressed_size: 19605481
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 107790850
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 107790888
dictionary_page_offset: 107790850
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 2412984
path_in_schema:
- edge
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 252846
total_uncompressed_size: 2413059
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 108043696
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 108043696
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 2412984
path_in_schema:
- time20
statistics:
distinct_count: null
max: b'\x00\x98\xe2v\xa9xE>'
max_value: null
min: b'\x00\x00\xc8\xe8J\x06\xdd='
min_value: null
null_count: 773119
total_compressed_size: 10136923
total_uncompressed_size: 13420585
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 118180619
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 118180619
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 2412984
path_in_schema:
- time50
statistics:
distinct_count: null
max: b'\x00`\x9f\xdf8\xe9\x05>'
max_value: null
min: b'\x00\x00\x00\x00\x00\x80\x03<'
min_value: null
null_count: 1212040
total_compressed_size: 7514835
total_uncompressed_size: 9909217
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 125695454
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 125695454
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 2412984
path_in_schema:
- time80
statistics:
distinct_count: null
max: b'\x00\xfe\x18\xbd\x86\x92C>'
max_value: null
min: b'\x00\x00\xc0`fc\x8a='
min_value: null
null_count: 1666286
total_compressed_size: 4730693
total_uncompressed_size: 6275249
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 130426147
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 130426176
dictionary_page_offset: 130426147
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 2412984
path_in_schema:
- global_variation
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 156
total_uncompressed_size: 2413049
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 130426303
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 130426341
dictionary_page_offset: 130426303
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 2412984
path_in_schema:
- sweep_index
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 165
total_uncompressed_size: 2413058
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 130426468
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 130426506
dictionary_page_offset: 130426468
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 2412984
path_in_schema:
- temp
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 165
total_uncompressed_size: 2413058
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 130426633
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 130426663
dictionary_page_offset: 130426633
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 2412984
path_in_schema:
- vdd
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 157
total_uncompressed_size: 2413050
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 130426790
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 130426820
dictionary_page_offset: 130426790
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 2412984
path_in_schema:
- vdd_imc
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 157
total_uncompressed_size: 2413050
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 130426947
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 130426977
dictionary_page_offset: 130426947
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 2412984
path_in_schema:
- vss
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 157
total_uncompressed_size: 2413050
type: 2
offset_index_length: null
offset_index_offset: null
file_offset: null
num_rows: 2412984
ordinal: null
sorting_columns: null
total_byte_size: 201251205
total_compressed_size: null,
columns:
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 130427104
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 132041474
dictionary_page_offset: 130427104
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1307603'
- key: b'numpy_dtype'
value: b'int32'
- key: b'label_dtype'
value: b'object'
num_values: 7717453
path_in_schema:
- net
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 1937450
total_uncompressed_size: 156367175
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 132364554
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 132364554
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7717453
path_in_schema:
- cycle
statistics:
distinct_count: null
max: b'\x00\x04\x00\x00\x00\x00\x00\x00'
max_value: null
min: b'\x02\x00\x00\x00\x00\x00\x00\x00'
min_value: null
null_count: 0
total_compressed_size: 5820515
total_uncompressed_size: 61739667
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 138185069
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 138185107
dictionary_page_offset: 138185069
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 7717453
path_in_schema:
- edge
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 790437
total_uncompressed_size: 7717528
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 138975506
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 138975506
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7717453
path_in_schema:
- time20
statistics:
distinct_count: null
max: b'\x00\x00\x0e\xb4\xa5;\x07>'
max_value: null
min: b'\x00\x00@\xd2\xba\x9f\xd3='
min_value: null
null_count: 2661380
total_compressed_size: 30853331
total_uncompressed_size: 41413307
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 169828837
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 169828837
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7717453
path_in_schema:
- time50
statistics:
distinct_count: null
max: b'\x00\x00\xeb\x8eN6\x07>'
max_value: null
min: b'\x00\x00\x84sN\xfc\xe6='
min_value: null
null_count: 4006276
total_compressed_size: 22925077
total_uncompressed_size: 30654139
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 192753914
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 192753914
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7717453
path_in_schema:
- time80
statistics:
distinct_count: null
max: b'\x00\x80p\xb4\xec\xa4\x07>'
max_value: null
min: b'\x00\x04\x87\x84Z\x9b\xe2='
min_value: null
null_count: 5130506
total_compressed_size: 16229937
total_uncompressed_size: 21660299
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 208983851
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 208983880
dictionary_page_offset: 208983851
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 7717453
path_in_schema:
- global_variation
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 316
total_uncompressed_size: 7717518
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 208984167
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 208984205
dictionary_page_offset: 208984167
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 7717453
path_in_schema:
- sweep_index
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 341
total_uncompressed_size: 7717527
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 208984508
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 208984546
dictionary_page_offset: 208984508
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 7717453
path_in_schema:
- temp
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 341
total_uncompressed_size: 7717527
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 208984849
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 208984879
dictionary_page_offset: 208984849
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 7717453
path_in_schema:
- vdd
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 317
total_uncompressed_size: 7717519
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 208985166
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 208985196
dictionary_page_offset: 208985166
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 7717453
path_in_schema:
- vdd_imc
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 317
total_uncompressed_size: 7717519
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 208985483
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 208985513
dictionary_page_offset: 208985483
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 7717453
path_in_schema:
- vss
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 317
total_uncompressed_size: 7717519
type: 2
offset_index_length: null
offset_index_offset: null
file_offset: null
num_rows: 7717453
ordinal: null
sorting_columns: null
total_byte_size: 365857244
total_compressed_size: null,
columns:
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 208985800
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 210600170
dictionary_page_offset: 208985800
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1307603'
- key: b'numpy_dtype'
value: b'int32'
- key: b'label_dtype'
value: b'object'
num_values: 7001247
path_in_schema:
- net
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 1891955
total_uncompressed_size: 153502351
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 210877755
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 210877755
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7001247
path_in_schema:
- cycle
statistics:
distinct_count: null
max: b'\t\x04\x00\x00\x00\x00\x00\x00'
max_value: null
min: b'\x00\x00\x00\x00\x00\x00\x00\x00'
min_value: null
null_count: 7
total_compressed_size: 5309688
total_uncompressed_size: 56885117
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 216187443
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 216187481
dictionary_page_offset: 216187443
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 7001247
path_in_schema:
- edge
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 782009
total_uncompressed_size: 7001322
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 216969452
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 216969452
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7001247
path_in_schema:
- time20
statistics:
distinct_count: null
max: b'\x00\xc8\xc7\x83\xd4xE>'
max_value: null
min: b'\x00\x00\x99\xa7\xcf-\xda='
min_value: null
null_count: 2569291
total_compressed_size: 27435118
total_uncompressed_size: 36330845
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 244404570
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 244404570
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7001247
path_in_schema:
- time50
statistics:
distinct_count: null
max: b'\x00\xc0 \xf2\xf5\xa3\x07>'
max_value: null
min: b'\x00\x00\x00\x00\x00\x80\x03<'
min_value: null
null_count: 3655088
total_compressed_size: 20922399
total_uncompressed_size: 27644469
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 265326969
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 265326969
dictionary_page_offset: null
encoding_stats:
- count: 1
encoding: 0
page_type: 0
encodings:
- 0
index_page_offset: null
key_value_metadata: []
num_values: 7001247
path_in_schema:
- time80
statistics:
distinct_count: null
max: b'\x00\xa0\xd1h\x11\xf4\x07>'
max_value: null
min: b'\x00\x00\xc0`fc\x8a='
min_value: null
null_count: 4509842
total_compressed_size: 15651107
total_uncompressed_size: 20806437
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 280978076
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 280978105
dictionary_page_offset: 280978076
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'object'
num_values: 7001247
path_in_schema:
- global_variation
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 296
total_uncompressed_size: 7001312
type: 6
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 280978372
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 280978410
dictionary_page_offset: 280978372
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 7001247
path_in_schema:
- sweep_index
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 321
total_uncompressed_size: 7001321
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 280978693
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 280978731
dictionary_page_offset: 280978693
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'2'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 7001247
path_in_schema:
- temp
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 321
total_uncompressed_size: 7001321
type: 2
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 280979014
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 280979044
dictionary_page_offset: 280979014
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 7001247
path_in_schema:
- vdd
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 297
total_uncompressed_size: 7001313
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 280979311
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 280979341
dictionary_page_offset: 280979311
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'float64'
num_values: 7001247
path_in_schema:
- vdd_imc
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 297
total_uncompressed_size: 7001313
type: 5
offset_index_length: null
offset_index_offset: null
- column_index_length: null
column_index_offset: null
crypto_metadata: null
encrypted_column_metadata: null
file_offset: 280979608
file_path: null
meta_data:
bloom_filter_offset: null
codec: 6
data_page_offset: 280979638
dictionary_page_offset: 280979608
encoding_stats:
- count: 1
encoding: 0
page_type: 2
- count: 1
encoding: 8
page_type: 0
encodings:
- 0
- 8
index_page_offset: null
key_value_metadata:
- key: b'num_categories'
value: b'1'
- key: b'numpy_dtype'
value: b'int8'
- key: b'label_dtype'
value: b'int64'
num_values: 7001247
path_in_schema:
- vss
statistics:
distinct_count: null
max: null
max_value: null
min: null
min_value: null
null_count: 0
total_compressed_size: 297
total_uncompressed_size: 7001313
type: 2
offset_index_length: null
offset_index_offset: null
file_offset: null
num_rows: 7001247
ordinal: null
sorting_columns: null
total_byte_size: 344178434
total_compressed_size: null]
pf.pandas_metadata
{'column_indexes': [{'field_name': None,
'metadata': None,
'name': None,
'numpy_type': 'object',
'pandas_type': 'mixed-integer'}],
'columns': [{'field_name': 'net',
'metadata': {'num_categories': 1307603, 'ordered': False},
'name': 'net',
'numpy_type': 'int32',
'pandas_type': 'categorical'},
{'field_name': 'cycle',
'metadata': None,
'name': 'cycle',
'numpy_type': 'int64',
'pandas_type': 'int64'},
{'field_name': 'edge',
'metadata': {'num_categories': 2, 'ordered': False},
'name': 'edge',
'numpy_type': 'int8',
'pandas_type': 'categorical'},
{'field_name': 'time20',
'metadata': None,
'name': 'time20',
'numpy_type': 'float64',
'pandas_type': 'float64'},
{'field_name': 'time50',
'metadata': None,
'name': 'time50',
'numpy_type': 'float64',
'pandas_type': 'float64'},
{'field_name': 'time80',
'metadata': None,
'name': 'time80',
'numpy_type': 'float64',
'pandas_type': 'float64'},
{'field_name': 'global_variation',
'metadata': {'num_categories': 1, 'ordered': False},
'name': 'global_variation',
'numpy_type': 'int8',
'pandas_type': 'categorical'},
{'field_name': 'sweep_index',
'metadata': {'num_categories': 2, 'ordered': False},
'name': 'sweep_index',
'numpy_type': 'int8',
'pandas_type': 'categorical'},
{'field_name': 'temp',
'metadata': {'num_categories': 2, 'ordered': False},
'name': 'temp',
'numpy_type': 'int8',
'pandas_type': 'categorical'},
{'field_name': 'vdd',
'metadata': {'num_categories': 1, 'ordered': False},
'name': 'vdd',
'numpy_type': 'int8',
'pandas_type': 'categorical'},
{'field_name': 'vdd_imc',
'metadata': {'num_categories': 1, 'ordered': False},
'name': 'vdd_imc',
'numpy_type': 'int8',
'pandas_type': 'categorical'},
{'field_name': 'vss',
'metadata': {'num_categories': 1, 'ordered': False},
'name': 'vss',
'numpy_type': 'int8',
'pandas_type': 'categorical'}],
'creator': {'library': 'fastparquet', 'version': '2024.11.0'},
'index_columns': [{'kind': 'range',
'name': None,
'start': 0,
'step': 1,
'stop': 10127116}],
'pandas_version': '2.2.3',
'partition_columns': []}
Anything else we need to know?: I can read this parquet file using pyarrow (pandas.read_parquet)), which shows 14 rows were cycle is NaN. Of course, this loses all category information.
Possibly related:
- Optional Ints can cause parser errors #808
- Nullable types for 1 row vs multiple rows #901
- pandas_nulls not working for ParquetFile #948
Environment:
- Dask version: fastparquet 2024.11.0
- Python version: Python 3.12.0
- Operating System: Red Hat Enterprise Linux Workstation release 7.4 (Maipo)
- Install method (conda, pip, source): pip
Metadata
Metadata
Assignees
Labels
No labels