1
+ // Copyright 2024 Zilliz
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ #include " ../../packed/packed_test_base.h"
16
+ #include " format/parquet/file_reader.h"
17
+ namespace milvus_storage {
18
+
19
+ class FileReaderTest : public PackedTestBase {};
20
+
21
+ TEST_F (FileReaderTest, FileRecordBatchReader) {
22
+ int batch_size = 100 ;
23
+
24
+ PackedRecordBatchWriter writer (writer_memory_, schema_, *fs_, file_path_, storage_config_, props_);
25
+ for (int i = 0 ; i < batch_size; ++i) {
26
+ EXPECT_TRUE (writer.Write (record_batch_).ok ());
27
+ }
28
+ EXPECT_TRUE (writer.Close ().ok ());
29
+
30
+ std::vector<std::shared_ptr<arrow::Field>> fields = {
31
+ arrow::field (" int32" , arrow::int32 ()),
32
+ arrow::field (" int64" , arrow::int64 ()),
33
+ arrow::field (" str" , arrow::utf8 ()),
34
+ };
35
+ auto schema = arrow::schema (fields);
36
+
37
+ // exeed row group range, should throw out_of_range
38
+ std::string path = file_path_ + " /0" ;
39
+ EXPECT_THROW (FileRecordBatchReader fr (*fs_, path, schema, reader_memory_, 100 ), std::out_of_range);
40
+
41
+ // file not exist, should throw runtime_error
42
+ path = file_path_ + " /file_not_exist" ;
43
+ EXPECT_THROW (FileRecordBatchReader fr (*fs_, path, schema, reader_memory_), std::runtime_error);
44
+
45
+ // read all row groups
46
+ path = file_path_ + " /0" ;
47
+ FileRecordBatchReader fr (*fs_, path, schema, reader_memory_);
48
+ ASSERT_AND_ARROW_ASSIGN (auto fr_table, fr.ToTable ());
49
+ ASSERT_STATUS_OK (fr.Close ());
50
+
51
+ std::set<int > needed_columns = {0 , 1 , 2 };
52
+ std::vector<ColumnOffset> column_offsets = {
53
+ ColumnOffset (0 , 0 ),
54
+ ColumnOffset (0 , 1 ),
55
+ ColumnOffset (0 , 2 ),
56
+ };
57
+ PackedRecordBatchReader pr (*fs_, {path}, schema, column_offsets, needed_columns, reader_memory_);
58
+ ASSERT_AND_ARROW_ASSIGN (auto pr_table, pr.ToTable ());
59
+ ASSERT_STATUS_OK (pr.Close ());
60
+ ASSERT_EQ (fr_table->num_rows (), pr_table->num_rows ());
61
+
62
+ // read row group 1
63
+ path = file_path_ + " /0" ;
64
+ FileRecordBatchReader rgr (*fs_, path, schema, reader_memory_, 1 , 1 );
65
+ ASSERT_AND_ARROW_ASSIGN (auto rg_table, rgr.ToTable ());
66
+ ASSERT_STATUS_OK (rgr.Close ());
67
+ ASSERT_GT (fr_table->num_rows (), rg_table->num_rows ());
68
+ }
69
+
70
+ } // namespace milvus_storage
0 commit comments