|
21 | 21 |
|
22 | 22 | @pytest.fixture( |
23 | 23 | params=[ |
24 | | - # pytest.param("fastparquet", marks=FASTPARQUET_MARK), |
| 24 | + pytest.param("fastparquet", marks=FASTPARQUET_MARK), |
25 | 25 | pytest.param("pyarrow", marks=PYARROW_MARK), |
26 | 26 | ] |
27 | 27 | ) |
@@ -145,3 +145,32 @@ def test_open_parquet_file( |
145 | 145 | max_block=max_block, |
146 | 146 | footer_sample_size=footer_sample_size, |
147 | 147 | ) |
| 148 | + |
| 149 | + |
| 150 | +@FASTPARQUET_MARK |
| 151 | +def test_with_filter(tmpdir): |
| 152 | + import pandas as pd |
| 153 | + |
| 154 | + df = pd.DataFrame( |
| 155 | + { |
| 156 | + "a": [10, 1, 2, 3, 7, 8, 9], |
| 157 | + "b": ["a", "a", "a", "b", "b", "b", "b"], |
| 158 | + } |
| 159 | + ) |
| 160 | + fn = os.path.join(str(tmpdir), "test.parquet") |
| 161 | + df.to_parquet(fn, engine="fastparquet", row_group_offsets=[0, 3], stats=True) |
| 162 | + |
| 163 | + expect = pd.read_parquet(fn, engine="fastparquet", filters=[["b", "==", "b"]]) |
| 164 | + f = open_parquet_file( |
| 165 | + fn, |
| 166 | + engine="fastparquet", |
| 167 | + filters=[["b", "==", "b"]], |
| 168 | + max_gap=1, |
| 169 | + max_block=1, |
| 170 | + footer_sample_size=8, |
| 171 | + ) |
| 172 | + assert (0, 4) in f.cache.data |
| 173 | + assert f.cache.size < os.path.getsize(fn) |
| 174 | + |
| 175 | + result = pd.read_parquet(f, engine="fastparquet", filters=[["b", "==", "b"]]) |
| 176 | + pd.testing.assert_frame_equal(expect, result) |
0 commit comments