27
27
timedelta ,
28
28
timezone ,
29
29
)
30
+ from decimal import Decimal
30
31
from typing import (
31
32
Any ,
32
33
Dict ,
@@ -446,6 +447,9 @@ def construct_test_table_primitive_types() -> Tuple[pq.FileMetaData, Union[Table
446
447
{"id" : 10 , "name" : "strings" , "required" : False , "type" : "string" },
447
448
{"id" : 11 , "name" : "uuids" , "required" : False , "type" : "uuid" },
448
449
{"id" : 12 , "name" : "binaries" , "required" : False , "type" : "binary" },
450
+ {"id" : 13 , "name" : "decimal8" , "required" : False , "type" : "decimal(5, 2)" },
451
+ {"id" : 14 , "name" : "decimal16" , "required" : False , "type" : "decimal(16, 6)" },
452
+ {"id" : 15 , "name" : "decimal32" , "required" : False , "type" : "decimal(19, 6)" },
449
453
],
450
454
},
451
455
],
@@ -470,6 +474,9 @@ def construct_test_table_primitive_types() -> Tuple[pq.FileMetaData, Union[Table
470
474
strings = ["hello" , "world" ]
471
475
uuids = [uuid .uuid3 (uuid .NAMESPACE_DNS , "foo" ).bytes , uuid .uuid3 (uuid .NAMESPACE_DNS , "bar" ).bytes ]
472
476
binaries = [b"hello" , b"world" ]
477
+ decimal8 = pa .array ([Decimal ("123.45" ), Decimal ("678.91" )], pa .decimal128 (8 , 2 ))
478
+ decimal16 = pa .array ([Decimal ("12345679.123456" ), Decimal ("67891234.678912" )], pa .decimal128 (16 , 6 ))
479
+ decimal32 = pa .array ([Decimal ("1234567890123.123456" ), Decimal ("9876543210703.654321" )], pa .decimal128 (19 , 6 ))
473
480
474
481
table = pa .Table .from_pydict (
475
482
{
@@ -485,14 +492,17 @@ def construct_test_table_primitive_types() -> Tuple[pq.FileMetaData, Union[Table
485
492
"strings" : strings ,
486
493
"uuids" : uuids ,
487
494
"binaries" : binaries ,
495
+ "decimal8" : decimal8 ,
496
+ "decimal16" : decimal16 ,
497
+ "decimal32" : decimal32 ,
488
498
},
489
499
schema = arrow_schema ,
490
500
)
491
501
492
502
metadata_collector : List [Any ] = []
493
503
494
504
with pa .BufferOutputStream () as f :
495
- with pq .ParquetWriter (f , table .schema , metadata_collector = metadata_collector ) as writer :
505
+ with pq .ParquetWriter (f , table .schema , metadata_collector = metadata_collector , store_decimal_as_integer = True ) as writer :
496
506
writer .write_table (table )
497
507
498
508
return metadata_collector [0 ], table_metadata
@@ -510,13 +520,13 @@ def test_metrics_primitive_types() -> None:
510
520
)
511
521
datafile = DataFile (** statistics .to_serialized_dict ())
512
522
513
- assert len (datafile .value_counts ) == 12
514
- assert len (datafile .null_value_counts ) == 12
523
+ assert len (datafile .value_counts ) == 15
524
+ assert len (datafile .null_value_counts ) == 15
515
525
assert len (datafile .nan_value_counts ) == 0
516
526
517
527
tz = timezone (timedelta (seconds = 19800 ))
518
528
519
- assert len (datafile .lower_bounds ) == 12
529
+ assert len (datafile .lower_bounds ) == 15
520
530
assert datafile .lower_bounds [1 ] == STRUCT_BOOL .pack (False )
521
531
assert datafile .lower_bounds [2 ] == STRUCT_INT32 .pack (23 )
522
532
assert datafile .lower_bounds [3 ] == STRUCT_INT64 .pack (2 )
@@ -529,8 +539,11 @@ def test_metrics_primitive_types() -> None:
529
539
assert datafile .lower_bounds [10 ] == b"he"
530
540
assert datafile .lower_bounds [11 ] == uuid .uuid3 (uuid .NAMESPACE_DNS , "foo" ).bytes
531
541
assert datafile .lower_bounds [12 ] == b"he"
542
+ assert datafile .lower_bounds [13 ][::- 1 ].ljust (4 , b"\x00 " ) == STRUCT_INT32 .pack (12345 )
543
+ assert datafile .lower_bounds [14 ][::- 1 ].ljust (8 , b"\x00 " ) == STRUCT_INT64 .pack (12345679123456 )
544
+ assert str (int .from_bytes (datafile .lower_bounds [15 ], byteorder = "big" , signed = True )).encode ("utf-8" ) == b"1234567890123123456"
532
545
533
- assert len (datafile .upper_bounds ) == 12
546
+ assert len (datafile .upper_bounds ) == 15
534
547
assert datafile .upper_bounds [1 ] == STRUCT_BOOL .pack (True )
535
548
assert datafile .upper_bounds [2 ] == STRUCT_INT32 .pack (89 )
536
549
assert datafile .upper_bounds [3 ] == STRUCT_INT64 .pack (54 )
@@ -543,6 +556,9 @@ def test_metrics_primitive_types() -> None:
543
556
assert datafile .upper_bounds [10 ] == b"wp"
544
557
assert datafile .upper_bounds [11 ] == uuid .uuid3 (uuid .NAMESPACE_DNS , "bar" ).bytes
545
558
assert datafile .upper_bounds [12 ] == b"wp"
559
+ assert datafile .upper_bounds [13 ][::- 1 ].ljust (4 , b"\x00 " ) == STRUCT_INT32 .pack (67891 )
560
+ assert datafile .upper_bounds [14 ][::- 1 ].ljust (8 , b"\x00 " ) == STRUCT_INT64 .pack (67891234678912 )
561
+ assert str (int .from_bytes (datafile .upper_bounds [15 ], byteorder = "big" , signed = True )).encode ("utf-8" ) == b"9876543210703654321"
546
562
547
563
548
564
def construct_test_table_invalid_upper_bound () -> Tuple [pq .FileMetaData , Union [TableMetadataV1 , TableMetadataV2 ]]:
0 commit comments