@@ -59,6 +59,8 @@ pub struct TableScanBuilder<'a> {
5959 concurrency_limit_manifest_files : usize ,
6060 row_group_filtering_enabled : bool ,
6161 row_selection_enabled : bool ,
62+
63+ limit : Option < usize > ,
6264}
6365
6466impl < ' a > TableScanBuilder < ' a > {
@@ -77,9 +79,16 @@ impl<'a> TableScanBuilder<'a> {
7779 concurrency_limit_manifest_files : num_cpus,
7880 row_group_filtering_enabled : true ,
7981 row_selection_enabled : false ,
82+ limit : None ,
8083 }
8184 }
8285
86+ /// Sets the maximum number of records to return
87+ pub fn with_limit ( mut self , limit : Option < usize > ) -> Self {
88+ self . limit = limit;
89+ self
90+ }
91+
8392 /// Sets the desired size of batches in the response
8493 /// to something other than the default
8594 pub fn with_batch_size ( mut self , batch_size : Option < usize > ) -> Self {
@@ -281,6 +290,7 @@ impl<'a> TableScanBuilder<'a> {
281290 snapshot_schema : schema,
282291 case_sensitive : self . case_sensitive ,
283292 predicate : self . filter . map ( Arc :: new) ,
293+ limit : self . limit ,
284294 snapshot_bound_predicate : snapshot_bound_predicate. map ( Arc :: new) ,
285295 object_cache : self . table . object_cache ( ) ,
286296 field_ids : Arc :: new ( field_ids) ,
@@ -1406,6 +1416,130 @@ pub mod tests {
14061416 assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
14071417 }
14081418
1419+ #[ tokio:: test]
1420+ async fn test_limit ( ) {
1421+ let mut fixture = TableTestFixture :: new ( ) ;
1422+ fixture. setup_manifest_files ( ) . await ;
1423+
1424+ let mut builder = fixture. table . scan ( ) ;
1425+ builder = builder. with_limit ( Some ( 1 ) ) ;
1426+ let table_scan = builder. build ( ) . unwrap ( ) ;
1427+
1428+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1429+
1430+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1431+
1432+ assert_eq ! ( batches. len( ) , 2 ) ;
1433+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1434+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1435+
1436+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1437+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1438+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1439+
1440+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1441+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1442+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1443+
1444+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1445+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1446+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1447+
1448+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1449+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1450+ assert_eq ! ( int64_arr. value( 0 ) , 2 ) ;
1451+ }
1452+
1453+ #[ tokio:: test]
1454+ async fn test_limit_with_predicate ( ) {
1455+ let mut fixture = TableTestFixture :: new ( ) ;
1456+ fixture. setup_manifest_files ( ) . await ;
1457+
1458+ // Filter: y > 3
1459+ let mut builder = fixture. table . scan ( ) ;
1460+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1461+ builder = builder. with_filter ( predicate) . with_limit ( Some ( 1 ) ) ;
1462+ let table_scan = builder. build ( ) . unwrap ( ) ;
1463+
1464+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1465+
1466+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1467+
1468+ assert_eq ! ( batches. len( ) , 2 ) ;
1469+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1470+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1471+
1472+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1473+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1474+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1475+
1476+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1477+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1478+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1479+ }
1480+
1481+ #[ tokio:: test]
1482+ async fn test_limit_with_predicate_and_row_selection ( ) {
1483+ let mut fixture = TableTestFixture :: new ( ) ;
1484+ fixture. setup_manifest_files ( ) . await ;
1485+
1486+ // Filter: y > 3
1487+ let mut builder = fixture. table . scan ( ) ;
1488+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1489+ builder = builder
1490+ . with_filter ( predicate)
1491+ . with_limit ( Some ( 1 ) )
1492+ . with_row_selection_enabled ( true ) ;
1493+ let table_scan = builder. build ( ) . unwrap ( ) ;
1494+
1495+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1496+
1497+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1498+
1499+ assert_eq ! ( batches. len( ) , 2 ) ;
1500+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 1 ) ;
1501+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 1 ) ;
1502+
1503+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1504+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1505+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1506+
1507+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1508+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1509+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1510+ }
1511+
1512+ #[ tokio:: test]
1513+ async fn test_limit_higher_than_total_rows ( ) {
1514+ let mut fixture = TableTestFixture :: new ( ) ;
1515+ fixture. setup_manifest_files ( ) . await ;
1516+
1517+ // Filter: y > 3
1518+ let mut builder = fixture. table . scan ( ) ;
1519+ let predicate = Reference :: new ( "y" ) . greater_than ( Datum :: long ( 3 ) ) ;
1520+ builder = builder
1521+ . with_filter ( predicate)
1522+ . with_limit ( Some ( 100_000_000 ) )
1523+ . with_row_selection_enabled ( true ) ;
1524+ let table_scan = builder. build ( ) . unwrap ( ) ;
1525+
1526+ let batch_stream = table_scan. to_arrow ( ) . await . unwrap ( ) ;
1527+
1528+ let batches: Vec < _ > = batch_stream. try_collect ( ) . await . unwrap ( ) ;
1529+
1530+ assert_eq ! ( batches. len( ) , 2 ) ;
1531+ assert_eq ! ( batches[ 0 ] . num_rows( ) , 312 ) ;
1532+ assert_eq ! ( batches[ 1 ] . num_rows( ) , 312 ) ;
1533+
1534+ let col = batches[ 0 ] . column_by_name ( "x" ) . unwrap ( ) ;
1535+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1536+ assert_eq ! ( int64_arr. value( 0 ) , 1 ) ;
1537+
1538+ let col = batches[ 0 ] . column_by_name ( "y" ) . unwrap ( ) ;
1539+ let int64_arr = col. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1540+ assert_eq ! ( int64_arr. value( 0 ) , 4 ) ;
1541+ }
1542+
14091543 #[ tokio:: test]
14101544 async fn test_filter_on_arrow_gt_eq ( ) {
14111545 let mut fixture = TableTestFixture :: new ( ) ;
@@ -1780,6 +1914,7 @@ pub mod tests {
17801914 record_count : Some ( 100 ) ,
17811915 data_file_format : DataFileFormat :: Parquet ,
17821916 deletes : vec ! [ ] ,
1917+ limit : None ,
17831918 } ;
17841919 test_fn ( task) ;
17851920
@@ -1794,6 +1929,7 @@ pub mod tests {
17941929 record_count : None ,
17951930 data_file_format : DataFileFormat :: Avro ,
17961931 deletes : vec ! [ ] ,
1932+ limit : None ,
17971933 } ;
17981934 test_fn ( task) ;
17991935 }
0 commit comments