Skip to content

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType' #959

@brechtm

Description

@brechtm

Describe the issue: to_pandas() crashes when encountering NA values in an INT64 OPTIONAL column (cycle in this parquet file)

Minimal Complete Verifiable Example:

import fastparquet as fp

pf = fp.ParquetFile("ffg_partial.parquet")
pf.to_pandas()
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[71], [line 1](vscode-notebook-cell:?execution_count=71&line=1)
----> [1](vscode-notebook-cell:?execution_count=71&line=1) df = pf.to_pandas()

File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/api.py:790, in ParquetFile.to_pandas(self, columns, categories, filters, index, row_filter, dtypes)
    [786](.../fastparquet/api.py:786)         continue
    [787](.../fastparquet/api.py:787)     parts = {name: (v if name.endswith('-catdef')
    [788](.../fastparquet/api.py:788)                     else v[start:start + thislen])
    [789](.../fastparquet/api.py:789)              for (name, v) in views.items()}
--> [790](.../fastparquet/api.py:790)     self.read_row_group_file(rg, columns, categories, index,
    [791](.../fastparquet/api.py:791)                              assign=parts, partition_meta=self.partition_meta,
    [792](.../fastparquet/api.py:792)                              row_filter=sel, infile=infile)
    [793](.../fastparquet/api.py:793)     start += thislen
    [794](.../fastparquet/api.py:794) return df

File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/api.py:388, in ParquetFile.read_row_group_file(self, rg, columns, categories, index, assign, partition_meta, row_filter, infile)
    [385](.../fastparquet/api.py:385)     ret = True
    [386](.../fastparquet/api.py:386) f = infile or self.open(fn, mode='rb')
--> [388](.../fastparquet/api.py:388) core.read_row_group(
    [389](.../fastparquet/api.py:389)     f, rg, columns, categories, self.schema, self.cats,
    [390](.../fastparquet/api.py:390)     selfmade=self.selfmade, index=index,
    [391](.../fastparquet/api.py:391)     assign=assign, scheme=self.file_scheme, partition_meta=partition_meta,
    [392](.../fastparquet/api.py:392)     row_filter=row_filter
    [393](.../fastparquet/api.py:393) )
    [394](.../fastparquet/api.py:394) if ret:
    [395](.../fastparquet/api.py:395)     return df

File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/core.py:644, in read_row_group(file, rg, columns, categories, schema_helper, cats, selfmade, index, assign, scheme, partition_meta, row_filter)
    [642](.../fastparquet/core.py:642) if assign is None:
    [643](.../fastparquet/core.py:643)     raise RuntimeError('Going with pre-allocation!')
--> [644](.../fastparquet/core.py:644) read_row_group_arrays(file, rg, columns, categories, schema_helper,
    [645](.../fastparquet/core.py:645)                       cats, selfmade, assign=assign, row_filter=row_filter)
    [647](.../fastparquet/core.py:647) for cat in cats:
    [648](.../fastparquet/core.py:648)     if cat not in assign:
    [649](.../fastparquet/core.py:649)         # do no need to have partition columns in output

File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/core.py:615, in read_row_group_arrays(file, rg, columns, categories, schema_helper, cats, selfmade, assign, row_filter)
    [612](.../fastparquet/core.py:612)     continue
    [613](.../fastparquet/core.py:613) remains.discard(name)
--> [615](.../fastparquet/core.py:615) read_col(column, schema_helper, file, use_cat=name+'-catdef' in out,
    [616](.../fastparquet/core.py:616)          selfmade=selfmade, assign=out[name],
    [617](.../fastparquet/core.py:617)          catdef=out.get(name+'-catdef', None),
    [618](.../fastparquet/core.py:618)          row_filter=row_filter)
    [620](.../fastparquet/core.py:620) if _is_map_like(schema_helper, column):
    [621](.../fastparquet/core.py:621)     # TODO: could be done in fast loop in _assemble_objects?
    [622](.../fastparquet/core.py:622)     if name not in maps:

File /projects/europa/users/brechtm/clean/.venv/lib/python3.12/site-packages/fastparquet/core.py:561, in read_col(column, schema_helper, infile, use_cat, selfmade, assign, catdef, row_filter)
    [559](.../fastparquet/core.py:559)     part = part._data
    [560](.../fastparquet/core.py:560) elif part.dtype.kind != "O":
--> [561](.../fastparquet/core.py:561)     part[defi != max_defi] = my_nan
    [562](.../fastparquet/core.py:562) if d and not use_cat:
    [563](.../fastparquet/core.py:563)     part[defi == max_defi] = dic[val]

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NAType'

I cannot share the parquet file, so hopefully the following information is sufficient to debug the issue.

> pf.selfmade
True
> pf.dtypes
OrderedDict([('net', 'category'),
             ('cycle', dtype('int64')),
             ('edge', 'category'),
             ('time20', dtype('float64')),
             ('time50', dtype('float64')),
             ('time80', dtype('float64')),
             ('global_variation', 'category'),
             ('sweep_index', 'category'),
             ('temp', 'category'),
             ('vdd', 'category'),
             ('vdd_imc', 'category'),
             ('vss', 'category')])
> print(pf.schema)
- schema: 
| - net: BYTE_ARRAY, UTF8, OPTIONAL
| - cycle: INT64, OPTIONAL
| - edge: BYTE_ARRAY, UTF8, OPTIONAL
| - time20: DOUBLE, OPTIONAL
| - time50: DOUBLE, OPTIONAL
| - time80: DOUBLE, OPTIONAL
| - global_variation: BYTE_ARRAY, UTF8, OPTIONAL
| - sweep_index: INT64, OPTIONAL
| - temp: INT64, OPTIONAL
| - vdd: DOUBLE, OPTIONAL
| - vdd_imc: DOUBLE, OPTIONAL
  - vss: INT64, OPTIONAL
pf.statistics
{'min': {'net': [None],
         'cycle': [2, 0, 2, 0],
         'edge': [None],
         'time20': [1.4773426876688636e-10,
                    1.0559080883552939e-10,
                    7.139149389929003e-11,
                    9.52384645968221e-11],
         'time50': [1.5455092029759065e-10,
                    1.3213713977167085e-19,
                    1.672420481250059e-10,
                    1.3213713977167085e-19],
         'time80': [1.473934018947706e-10,
                    3.0000002099352238e-12,
                    1.3538265149883599e-10,
                    3.0000002099352238e-12],
         'global_variation': [None],
         'sweep_index': [None, None, None, None],
         'temp': [None, None, None, None],
         'vdd': [None, None, None, None],
         'vdd_imc': [None, None, None, None],
         'vss': [None, None, None, None]},
 'max': {'net': [None],
         'cycle': [1024, 1033, 1024, 1033],
         'edge': [None],
         'time20': [6.524647908841792e-10,
                    9.998369878836728e-09,
                    6.761692109951692e-10,
                    9.99867577012857e-09],
         'time50': [6.316129033313601e-10,
                    6.376947296995981e-10,
                    6.755620766233663e-10,
                    6.880282769349157e-10],
         'time80': [6.419314096761683e-10,
                    9.11409428754411e-09,
                    6.881378599573002e-10,
                    6.971354202572327e-10],
         'global_variation': [None],
         'sweep_index': [None, None, None, None],
         'temp': [None, None, None, None],
         'vdd': [None, None, None, None],
         'vdd_imc': [None, None, None, None],
         'vss': [None, None, None, None]},
 'null_count': {'net': [0, 0, 0, 0],
                'cycle': [0, 7, 0, 7],
                'edge': [0, 0, 0, 0],
                'time20': [3540142, 773119, 2661380, 2569291],
                'time50': [5093064, 1212040, 4006276, 3655088],
                'time80': [6684602, 1666286, 5130506, 4509842],
                'global_variation': [0, 0, 0, 0],
                'sweep_index': [0, 0, 0, 0],
                'temp': [0, 0, 0, 0],
                'vdd': [0, 0, 0, 0],
                'vdd_imc': [0, 0, 0, 0],
                'vss': [0, 0, 0, 0]},
 'distinct_count': {'net': [None, None, None, None],
                    'cycle': [None, None, None, None],
                    'edge': [None, None, None, None],
                    'time20': [None, None, None, None],
                    'time50': [None, None, None, None],
                    'time80': [None, None, None, None],
                    'global_variation': [None, None, None, None],
                    'sweep_index': [None, None, None, None],
                    'temp': [None, None, None, None],
                    'vdd': [None, None, None, None],
                    'vdd_imc': [None, None, None, None],
                    'vss': [None, None, None, None]}}
pf.row_groups
[columns:
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 4
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 1614374
     dictionary_page_offset: 4
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1307603'
     - key: b'numpy_dtype'
       value: b'int32'
     - key: b'label_dtype'
       value: b'object'
     num_values: 10127116
     path_in_schema:
     - net
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 2104220
     total_uncompressed_size: 166005828
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 2104224
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 2104224
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 10127116
     path_in_schema:
     - cycle
     statistics:
       distinct_count: null
       max: b'\x00\x04\x00\x00\x00\x00\x00\x00'
       max_value: null
       min: b'\x02\x00\x00\x00\x00\x00\x00\x00'
       min_value: null
       null_count: 0
     total_compressed_size: 7843627
     total_uncompressed_size: 81016971
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 9947851
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 9947889
     dictionary_page_offset: 9947851
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 10127116
     path_in_schema:
     - edge
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 1052716
     total_uncompressed_size: 10127193
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 11000567
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 11000567
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 10127116
     path_in_schema:
     - time20
     statistics:
       distinct_count: null
       max: b'\x00`4\x18$k\x06>'
       max_value: null
       min: b'\x00\x80\xdb\xfe\xefM\xe4='
       min_value: null
       null_count: 3540142
     total_compressed_size: 40359107
     total_uncompressed_size: 53961724
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 51359674
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 51359674
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 10127116
     path_in_schema:
     - time50
     statistics:
       distinct_count: null
       max: b'\x00@k\xd1\xb9\xb3\x05>'
       max_value: null
       min: b'\x00\xc0"\xef\xc6=\xe5='
       min_value: null
       null_count: 5093064
     total_compressed_size: 31180098
     total_uncompressed_size: 41538348
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 82539772
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 82539772
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 10127116
     path_in_schema:
     - time80
     statistics:
       distinct_count: null
       max: b'\x00`Y\x07}\x0e\x06>'
       max_value: null
       min: b'\x00\x00]\xbd\xf1A\xe4='
       min_value: null
       null_count: 6684602
     total_compressed_size: 21626096
     total_uncompressed_size: 28806044
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 104165868
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 104165897
     dictionary_page_offset: 104165868
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 10127116
     path_in_schema:
     - global_variation
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 393
     total_uncompressed_size: 10127182
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 104166261
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 104166299
     dictionary_page_offset: 104166261
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 10127116
     path_in_schema:
     - sweep_index
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 402
     total_uncompressed_size: 10127191
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 104166663
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 104166701
     dictionary_page_offset: 104166663
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 10127116
     path_in_schema:
     - temp
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 402
     total_uncompressed_size: 10127191
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 104167065
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 104167095
     dictionary_page_offset: 104167065
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 10127116
     path_in_schema:
     - vdd
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 394
     total_uncompressed_size: 10127183
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 104167459
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 104167489
     dictionary_page_offset: 104167459
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 10127116
     path_in_schema:
     - vdd_imc
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 394
     total_uncompressed_size: 10127183
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 104167853
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 104167883
     dictionary_page_offset: 104167853
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 10127116
     path_in_schema:
     - vss
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 394
     total_uncompressed_size: 10127183
     type: 2
   offset_index_length: null
   offset_index_offset: null
 file_offset: null
 num_rows: 10127116
 ordinal: null
 sorting_columns: null
 total_byte_size: 442219221
 total_compressed_size: null,
 columns:
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 104168247
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 105782617
     dictionary_page_offset: 104168247
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1307603'
     - key: b'numpy_dtype'
       value: b'int32'
     - key: b'label_dtype'
       value: b'object'
     num_values: 2412984
     path_in_schema:
     - net
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 1730307
     total_uncompressed_size: 135149299
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 105898554
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 105898554
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 2412984
     path_in_schema:
     - cycle
     statistics:
       distinct_count: null
       max: b'\t\x04\x00\x00\x00\x00\x00\x00'
       max_value: null
       min: b'\x00\x00\x00\x00\x00\x00\x00\x00'
       min_value: null
       null_count: 7
     total_compressed_size: 1892296
     total_uncompressed_size: 19605481
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 107790850
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 107790888
     dictionary_page_offset: 107790850
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 2412984
     path_in_schema:
     - edge
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 252846
     total_uncompressed_size: 2413059
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 108043696
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 108043696
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 2412984
     path_in_schema:
     - time20
     statistics:
       distinct_count: null
       max: b'\x00\x98\xe2v\xa9xE>'
       max_value: null
       min: b'\x00\x00\xc8\xe8J\x06\xdd='
       min_value: null
       null_count: 773119
     total_compressed_size: 10136923
     total_uncompressed_size: 13420585
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 118180619
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 118180619
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 2412984
     path_in_schema:
     - time50
     statistics:
       distinct_count: null
       max: b'\x00`\x9f\xdf8\xe9\x05>'
       max_value: null
       min: b'\x00\x00\x00\x00\x00\x80\x03<'
       min_value: null
       null_count: 1212040
     total_compressed_size: 7514835
     total_uncompressed_size: 9909217
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 125695454
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 125695454
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 2412984
     path_in_schema:
     - time80
     statistics:
       distinct_count: null
       max: b'\x00\xfe\x18\xbd\x86\x92C>'
       max_value: null
       min: b'\x00\x00\xc0`fc\x8a='
       min_value: null
       null_count: 1666286
     total_compressed_size: 4730693
     total_uncompressed_size: 6275249
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 130426147
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 130426176
     dictionary_page_offset: 130426147
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 2412984
     path_in_schema:
     - global_variation
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 156
     total_uncompressed_size: 2413049
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 130426303
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 130426341
     dictionary_page_offset: 130426303
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 2412984
     path_in_schema:
     - sweep_index
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 165
     total_uncompressed_size: 2413058
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 130426468
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 130426506
     dictionary_page_offset: 130426468
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 2412984
     path_in_schema:
     - temp
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 165
     total_uncompressed_size: 2413058
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 130426633
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 130426663
     dictionary_page_offset: 130426633
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 2412984
     path_in_schema:
     - vdd
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 157
     total_uncompressed_size: 2413050
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 130426790
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 130426820
     dictionary_page_offset: 130426790
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 2412984
     path_in_schema:
     - vdd_imc
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 157
     total_uncompressed_size: 2413050
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 130426947
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 130426977
     dictionary_page_offset: 130426947
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 2412984
     path_in_schema:
     - vss
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 157
     total_uncompressed_size: 2413050
     type: 2
   offset_index_length: null
   offset_index_offset: null
 file_offset: null
 num_rows: 2412984
 ordinal: null
 sorting_columns: null
 total_byte_size: 201251205
 total_compressed_size: null,
 columns:
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 130427104
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 132041474
     dictionary_page_offset: 130427104
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1307603'
     - key: b'numpy_dtype'
       value: b'int32'
     - key: b'label_dtype'
       value: b'object'
     num_values: 7717453
     path_in_schema:
     - net
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 1937450
     total_uncompressed_size: 156367175
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 132364554
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 132364554
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7717453
     path_in_schema:
     - cycle
     statistics:
       distinct_count: null
       max: b'\x00\x04\x00\x00\x00\x00\x00\x00'
       max_value: null
       min: b'\x02\x00\x00\x00\x00\x00\x00\x00'
       min_value: null
       null_count: 0
     total_compressed_size: 5820515
     total_uncompressed_size: 61739667
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 138185069
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 138185107
     dictionary_page_offset: 138185069
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 7717453
     path_in_schema:
     - edge
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 790437
     total_uncompressed_size: 7717528
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 138975506
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 138975506
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7717453
     path_in_schema:
     - time20
     statistics:
       distinct_count: null
       max: b'\x00\x00\x0e\xb4\xa5;\x07>'
       max_value: null
       min: b'\x00\x00@\xd2\xba\x9f\xd3='
       min_value: null
       null_count: 2661380
     total_compressed_size: 30853331
     total_uncompressed_size: 41413307
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 169828837
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 169828837
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7717453
     path_in_schema:
     - time50
     statistics:
       distinct_count: null
       max: b'\x00\x00\xeb\x8eN6\x07>'
       max_value: null
       min: b'\x00\x00\x84sN\xfc\xe6='
       min_value: null
       null_count: 4006276
     total_compressed_size: 22925077
     total_uncompressed_size: 30654139
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 192753914
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 192753914
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7717453
     path_in_schema:
     - time80
     statistics:
       distinct_count: null
       max: b'\x00\x80p\xb4\xec\xa4\x07>'
       max_value: null
       min: b'\x00\x04\x87\x84Z\x9b\xe2='
       min_value: null
       null_count: 5130506
     total_compressed_size: 16229937
     total_uncompressed_size: 21660299
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 208983851
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 208983880
     dictionary_page_offset: 208983851
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 7717453
     path_in_schema:
     - global_variation
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 316
     total_uncompressed_size: 7717518
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 208984167
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 208984205
     dictionary_page_offset: 208984167
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 7717453
     path_in_schema:
     - sweep_index
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 341
     total_uncompressed_size: 7717527
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 208984508
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 208984546
     dictionary_page_offset: 208984508
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 7717453
     path_in_schema:
     - temp
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 341
     total_uncompressed_size: 7717527
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 208984849
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 208984879
     dictionary_page_offset: 208984849
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 7717453
     path_in_schema:
     - vdd
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 317
     total_uncompressed_size: 7717519
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 208985166
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 208985196
     dictionary_page_offset: 208985166
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 7717453
     path_in_schema:
     - vdd_imc
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 317
     total_uncompressed_size: 7717519
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 208985483
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 208985513
     dictionary_page_offset: 208985483
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 7717453
     path_in_schema:
     - vss
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 317
     total_uncompressed_size: 7717519
     type: 2
   offset_index_length: null
   offset_index_offset: null
 file_offset: null
 num_rows: 7717453
 ordinal: null
 sorting_columns: null
 total_byte_size: 365857244
 total_compressed_size: null,
 columns:
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 208985800
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 210600170
     dictionary_page_offset: 208985800
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1307603'
     - key: b'numpy_dtype'
       value: b'int32'
     - key: b'label_dtype'
       value: b'object'
     num_values: 7001247
     path_in_schema:
     - net
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 1891955
     total_uncompressed_size: 153502351
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 210877755
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 210877755
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7001247
     path_in_schema:
     - cycle
     statistics:
       distinct_count: null
       max: b'\t\x04\x00\x00\x00\x00\x00\x00'
       max_value: null
       min: b'\x00\x00\x00\x00\x00\x00\x00\x00'
       min_value: null
       null_count: 7
     total_compressed_size: 5309688
     total_uncompressed_size: 56885117
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 216187443
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 216187481
     dictionary_page_offset: 216187443
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 7001247
     path_in_schema:
     - edge
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 782009
     total_uncompressed_size: 7001322
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 216969452
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 216969452
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7001247
     path_in_schema:
     - time20
     statistics:
       distinct_count: null
       max: b'\x00\xc8\xc7\x83\xd4xE>'
       max_value: null
       min: b'\x00\x00\x99\xa7\xcf-\xda='
       min_value: null
       null_count: 2569291
     total_compressed_size: 27435118
     total_uncompressed_size: 36330845
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 244404570
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 244404570
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7001247
     path_in_schema:
     - time50
     statistics:
       distinct_count: null
       max: b'\x00\xc0 \xf2\xf5\xa3\x07>'
       max_value: null
       min: b'\x00\x00\x00\x00\x00\x80\x03<'
       min_value: null
       null_count: 3655088
     total_compressed_size: 20922399
     total_uncompressed_size: 27644469
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 265326969
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 265326969
     dictionary_page_offset: null
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 0
     encodings:
     - 0
     index_page_offset: null
     key_value_metadata: []
     num_values: 7001247
     path_in_schema:
     - time80
     statistics:
       distinct_count: null
       max: b'\x00\xa0\xd1h\x11\xf4\x07>'
       max_value: null
       min: b'\x00\x00\xc0`fc\x8a='
       min_value: null
       null_count: 4509842
     total_compressed_size: 15651107
     total_uncompressed_size: 20806437
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 280978076
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 280978105
     dictionary_page_offset: 280978076
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'object'
     num_values: 7001247
     path_in_schema:
     - global_variation
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 296
     total_uncompressed_size: 7001312
     type: 6
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 280978372
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 280978410
     dictionary_page_offset: 280978372
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 7001247
     path_in_schema:
     - sweep_index
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 321
     total_uncompressed_size: 7001321
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 280978693
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 280978731
     dictionary_page_offset: 280978693
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'2'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 7001247
     path_in_schema:
     - temp
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 321
     total_uncompressed_size: 7001321
     type: 2
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 280979014
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 280979044
     dictionary_page_offset: 280979014
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 7001247
     path_in_schema:
     - vdd
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 297
     total_uncompressed_size: 7001313
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 280979311
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 280979341
     dictionary_page_offset: 280979311
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'float64'
     num_values: 7001247
     path_in_schema:
     - vdd_imc
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 297
     total_uncompressed_size: 7001313
     type: 5
   offset_index_length: null
   offset_index_offset: null
 - column_index_length: null
   column_index_offset: null
   crypto_metadata: null
   encrypted_column_metadata: null
   file_offset: 280979608
   file_path: null
   meta_data:
     bloom_filter_offset: null
     codec: 6
     data_page_offset: 280979638
     dictionary_page_offset: 280979608
     encoding_stats:
     - count: 1
       encoding: 0
       page_type: 2
     - count: 1
       encoding: 8
       page_type: 0
     encodings:
     - 0
     - 8
     index_page_offset: null
     key_value_metadata:
     - key: b'num_categories'
       value: b'1'
     - key: b'numpy_dtype'
       value: b'int8'
     - key: b'label_dtype'
       value: b'int64'
     num_values: 7001247
     path_in_schema:
     - vss
     statistics:
       distinct_count: null
       max: null
       max_value: null
       min: null
       min_value: null
       null_count: 0
     total_compressed_size: 297
     total_uncompressed_size: 7001313
     type: 2
   offset_index_length: null
   offset_index_offset: null
 file_offset: null
 num_rows: 7001247
 ordinal: null
 sorting_columns: null
 total_byte_size: 344178434
 total_compressed_size: null]
pf.pandas_metadata
{'column_indexes': [{'field_name': None,
                     'metadata': None,
                     'name': None,
                     'numpy_type': 'object',
                     'pandas_type': 'mixed-integer'}],
 'columns': [{'field_name': 'net',
              'metadata': {'num_categories': 1307603, 'ordered': False},
              'name': 'net',
              'numpy_type': 'int32',
              'pandas_type': 'categorical'},
             {'field_name': 'cycle',
              'metadata': None,
              'name': 'cycle',
              'numpy_type': 'int64',
              'pandas_type': 'int64'},
             {'field_name': 'edge',
              'metadata': {'num_categories': 2, 'ordered': False},
              'name': 'edge',
              'numpy_type': 'int8',
              'pandas_type': 'categorical'},
             {'field_name': 'time20',
              'metadata': None,
              'name': 'time20',
              'numpy_type': 'float64',
              'pandas_type': 'float64'},
             {'field_name': 'time50',
              'metadata': None,
              'name': 'time50',
              'numpy_type': 'float64',
              'pandas_type': 'float64'},
             {'field_name': 'time80',
              'metadata': None,
              'name': 'time80',
              'numpy_type': 'float64',
              'pandas_type': 'float64'},
             {'field_name': 'global_variation',
              'metadata': {'num_categories': 1, 'ordered': False},
              'name': 'global_variation',
              'numpy_type': 'int8',
              'pandas_type': 'categorical'},
             {'field_name': 'sweep_index',
              'metadata': {'num_categories': 2, 'ordered': False},
              'name': 'sweep_index',
              'numpy_type': 'int8',
              'pandas_type': 'categorical'},
             {'field_name': 'temp',
              'metadata': {'num_categories': 2, 'ordered': False},
              'name': 'temp',
              'numpy_type': 'int8',
              'pandas_type': 'categorical'},
             {'field_name': 'vdd',
              'metadata': {'num_categories': 1, 'ordered': False},
              'name': 'vdd',
              'numpy_type': 'int8',
              'pandas_type': 'categorical'},
             {'field_name': 'vdd_imc',
              'metadata': {'num_categories': 1, 'ordered': False},
              'name': 'vdd_imc',
              'numpy_type': 'int8',
              'pandas_type': 'categorical'},
             {'field_name': 'vss',
              'metadata': {'num_categories': 1, 'ordered': False},
              'name': 'vss',
              'numpy_type': 'int8',
              'pandas_type': 'categorical'}],
 'creator': {'library': 'fastparquet', 'version': '2024.11.0'},
 'index_columns': [{'kind': 'range',
                    'name': None,
                    'start': 0,
                    'step': 1,
                    'stop': 10127116}],
 'pandas_version': '2.2.3',
 'partition_columns': []}

Anything else we need to know?: I can read this parquet file using pyarrow (pandas.read_parquet)), which shows 14 rows were cycle is NaN. Of course, this loses all category information.

Possibly related:

Environment:

  • Dask version: fastparquet 2024.11.0
  • Python version: Python 3.12.0
  • Operating System: Red Hat Enterprise Linux Workstation release 7.4 (Maipo)
  • Install method (conda, pip, source): pip

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions