@@ -21,14 +21,24 @@ use std::sync::Arc;
2121use  std:: sync:: atomic:: AtomicU64 ; 
2222
2323use  crate :: Result ; 
24- use  crate :: spec:: { DataFileFormat ,  TableMetadata } ; 
24+ use  crate :: spec:: { DataFileFormat ,  PartitionKey ,   TableMetadata } ; 
2525
2626/// `LocationGenerator` used to generate the location of data file. 
2727pub  trait  LocationGenerator :  Clone  + Send  + ' static  { 
28-     /// Generate an absolute path for the given file name. 
29- /// e.g. 
30- /// For file name "part-00000.parquet", the generated location maybe "/table/data/part-00000.parquet" 
31- fn  generate_location ( & self ,  file_name :  & str )  -> String ; 
28+     /// Generate an absolute path for the given file name that includes the partition path. 
29+ /// 
30+ /// # Arguments 
31+ /// 
32+ /// * `partition_key` - The partition key of the file. If None, generate a non-partitioned path. 
33+ /// * `file_name` - The name of the file 
34+ /// 
35+ /// # Returns 
36+ /// 
37+ /// An absolute path that includes the partition path, e.g., 
38+ /// "/table/data/id=1/name=alice/part-00000.parquet" 
39+ /// or non-partitioned path: 
40+ /// "/table/data/part-00000.parquet" 
41+ fn  generate_location ( & self ,  partition_key :  Option < & PartitionKey > ,  file_name :  & str )  -> String ; 
3242} 
3343
3444const  WRITE_DATA_LOCATION :  & str  = "write.data.path" ; 
@@ -39,29 +49,38 @@ const DEFAULT_DATA_DIR: &str = "/data";
3949/// `DefaultLocationGenerator` used to generate the data dir location of data file. 
4050/// The location is generated based on the table location and the data location in table properties. 
4151pub  struct  DefaultLocationGenerator  { 
42-     dir_path :  String , 
52+     data_location :  String , 
4353} 
4454
4555impl  DefaultLocationGenerator  { 
4656    /// Create a new `DefaultLocationGenerator`. 
4757pub  fn  new ( table_metadata :  TableMetadata )  -> Result < Self >  { 
4858        let  table_location = table_metadata. location ( ) ; 
4959        let  prop = table_metadata. properties ( ) ; 
50-         let  data_location  = prop
60+         let  configured_data_location  = prop
5161            . get ( WRITE_DATA_LOCATION ) 
5262            . or ( prop. get ( WRITE_FOLDER_STORAGE_LOCATION ) ) ; 
53-         let  dir_path  = if  let  Some ( data_location)  = data_location  { 
63+         let  data_location  = if  let  Some ( data_location)  = configured_data_location  { 
5464            data_location. clone ( ) 
5565        }  else  { 
5666            format ! ( "{}{}" ,  table_location,  DEFAULT_DATA_DIR ) 
5767        } ; 
58-         Ok ( Self  {  dir_path  } ) 
68+         Ok ( Self  {  data_location  } ) 
5969    } 
6070} 
6171
6272impl  LocationGenerator  for  DefaultLocationGenerator  { 
63-     fn  generate_location ( & self ,  file_name :  & str )  -> String  { 
64-         format ! ( "{}/{}" ,  self . dir_path,  file_name) 
73+     fn  generate_location ( & self ,  partition_key :  Option < & PartitionKey > ,  file_name :  & str )  -> String  { 
74+         if  PartitionKey :: is_effectively_none ( partition_key)  { 
75+             format ! ( "{}/{}" ,  self . data_location,  file_name) 
76+         }  else  { 
77+             format ! ( 
78+                 "{}/{}/{}" , 
79+                 self . data_location, 
80+                 partition_key. unwrap( ) . to_path( ) , 
81+                 file_name
82+             ) 
83+         } 
6584    } 
6685} 
6786
@@ -115,11 +134,15 @@ impl FileNameGenerator for DefaultFileNameGenerator {
115134#[ cfg( test) ]  
116135pub ( crate )  mod  test { 
117136    use  std:: collections:: HashMap ; 
137+     use  std:: sync:: Arc ; 
118138
119139    use  uuid:: Uuid ; 
120140
121141    use  super :: LocationGenerator ; 
122-     use  crate :: spec:: { FormatVersion ,  PartitionSpec ,  StructType ,  TableMetadata } ; 
142+     use  crate :: spec:: { 
143+         FormatVersion ,  Literal ,  NestedField ,  PartitionKey ,  PartitionSpec ,  PrimitiveType ,  Schema , 
144+         Struct ,  StructType ,  TableMetadata ,  Transform ,  Type , 
145+     } ; 
123146    use  crate :: writer:: file_writer:: location_generator:: { 
124147        FileNameGenerator ,  WRITE_DATA_LOCATION ,  WRITE_FOLDER_STORAGE_LOCATION , 
125148    } ; 
@@ -136,8 +159,17 @@ pub(crate) mod test {
136159    } 
137160
138161    impl  LocationGenerator  for  MockLocationGenerator  { 
139-         fn  generate_location ( & self ,  file_name :  & str )  -> String  { 
140-             format ! ( "{}/{}" ,  self . root,  file_name) 
162+         fn  generate_location ( & self ,  partition :  Option < & PartitionKey > ,  file_name :  & str )  -> String  { 
163+             if  PartitionKey :: is_effectively_none ( partition)  { 
164+                 format ! ( "{}/{}" ,  self . root,  file_name) 
165+             }  else  { 
166+                 format ! ( 
167+                     "{}/{}/{}" , 
168+                     self . root, 
169+                     partition. unwrap( ) . to_path( ) , 
170+                     file_name
171+                 ) 
172+             } 
141173        } 
142174    } 
143175
@@ -169,7 +201,7 @@ pub(crate) mod test {
169201            encryption_keys :  HashMap :: new ( ) , 
170202        } ; 
171203
172-         let  file_name_genertaor  = super :: DefaultFileNameGenerator :: new ( 
204+         let  file_name_generator  = super :: DefaultFileNameGenerator :: new ( 
173205            "part" . to_string ( ) , 
174206            Some ( "test" . to_string ( ) ) , 
175207            crate :: spec:: DataFileFormat :: Parquet , 
@@ -179,7 +211,7 @@ pub(crate) mod test {
179211        let  location_generator =
180212            super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ; 
181213        let  location =
182-             location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ; 
214+             location_generator. generate_location ( None ,   & file_name_generator . generate_file_name ( ) ) ; 
183215        assert_eq ! ( location,  "s3://data.db/table/data/part-00000-test.parquet" ) ; 
184216
185217        // test custom data location 
@@ -190,7 +222,7 @@ pub(crate) mod test {
190222        let  location_generator =
191223            super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ; 
192224        let  location =
193-             location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ; 
225+             location_generator. generate_location ( None ,   & file_name_generator . generate_file_name ( ) ) ; 
194226        assert_eq ! ( 
195227            location, 
196228            "s3://data.db/table/data_1/part-00001-test.parquet" 
@@ -203,7 +235,7 @@ pub(crate) mod test {
203235        let  location_generator =
204236            super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ; 
205237        let  location =
206-             location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ; 
238+             location_generator. generate_location ( None ,   & file_name_generator . generate_file_name ( ) ) ; 
207239        assert_eq ! ( 
208240            location, 
209241            "s3://data.db/table/data_2/part-00002-test.parquet" 
@@ -217,7 +249,79 @@ pub(crate) mod test {
217249        let  location_generator =
218250            super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ; 
219251        let  location =
220-             location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ; 
252+             location_generator. generate_location ( None ,   & file_name_generator . generate_file_name ( ) ) ; 
221253        assert_eq ! ( location,  "s3://data.db/data_3/part-00003-test.parquet" ) ; 
222254    } 
255+ 
256+     #[ test]  
257+     fn  test_location_generate_with_partition ( )  { 
258+         // Create a schema with two fields: id (int) and name (string) 
259+         let  schema = Arc :: new ( 
260+             Schema :: builder ( ) 
261+                 . with_schema_id ( 1 ) 
262+                 . with_fields ( vec ! [ 
263+                     NestedField :: required( 1 ,  "id" ,  Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) , 
264+                     NestedField :: required( 2 ,  "name" ,  Type :: Primitive ( PrimitiveType :: String ) ) . into( ) , 
265+                 ] ) 
266+                 . build ( ) 
267+                 . unwrap ( ) , 
268+         ) ; 
269+ 
270+         // Create a partition spec with both fields 
271+         let  partition_spec = PartitionSpec :: builder ( schema. clone ( ) ) 
272+             . add_partition_field ( "id" ,  "id" ,  Transform :: Identity ) 
273+             . unwrap ( ) 
274+             . add_partition_field ( "name" ,  "name" ,  Transform :: Identity ) 
275+             . unwrap ( ) 
276+             . build ( ) 
277+             . unwrap ( ) ; 
278+ 
279+         // Create partition data with values 
280+         let  partition_data =
281+             Struct :: from_iter ( [ Some ( Literal :: int ( 42 ) ) ,  Some ( Literal :: string ( "alice" ) ) ] ) ; 
282+ 
283+         // Create a partition key 
284+         let  partition_key = PartitionKey :: new ( partition_spec,  schema,  partition_data) ; 
285+ 
286+         // Test with MockLocationGenerator 
287+         let  mock_location_gen = MockLocationGenerator :: new ( "/base/path" . to_string ( ) ) ; 
288+         let  file_name = "data-00000.parquet" ; 
289+         let  location = mock_location_gen. generate_location ( Some ( & partition_key) ,  file_name) ; 
290+         assert_eq ! ( location,  "/base/path/id=42/name=alice/data-00000.parquet" ) ; 
291+ 
292+         // Create a table metadata for DefaultLocationGenerator 
293+         let  table_metadata = TableMetadata  { 
294+             format_version :  FormatVersion :: V2 , 
295+             table_uuid :  Uuid :: parse_str ( "fb072c92-a02b-11e9-ae9c-1bb7bc9eca94" ) . unwrap ( ) , 
296+             location :  "s3://data.db/table" . to_string ( ) , 
297+             last_updated_ms :  1515100955770 , 
298+             last_column_id :  2 , 
299+             schemas :  HashMap :: new ( ) , 
300+             current_schema_id :  1 , 
301+             partition_specs :  HashMap :: new ( ) , 
302+             default_spec :  PartitionSpec :: unpartition_spec ( ) . into ( ) , 
303+             default_partition_type :  StructType :: new ( vec ! [ ] ) , 
304+             last_partition_id :  1000 , 
305+             default_sort_order_id :  0 , 
306+             sort_orders :  HashMap :: from_iter ( vec ! [ ] ) , 
307+             snapshots :  HashMap :: default ( ) , 
308+             current_snapshot_id :  None , 
309+             last_sequence_number :  1 , 
310+             properties :  HashMap :: new ( ) , 
311+             snapshot_log :  Vec :: new ( ) , 
312+             metadata_log :  vec ! [ ] , 
313+             refs :  HashMap :: new ( ) , 
314+             statistics :  HashMap :: new ( ) , 
315+             partition_statistics :  HashMap :: new ( ) , 
316+             encryption_keys :  HashMap :: new ( ) , 
317+         } ; 
318+ 
319+         // Test with DefaultLocationGenerator 
320+         let  default_location_gen = super :: DefaultLocationGenerator :: new ( table_metadata) . unwrap ( ) ; 
321+         let  location = default_location_gen. generate_location ( Some ( & partition_key) ,  file_name) ; 
322+         assert_eq ! ( 
323+             location, 
324+             "s3://data.db/table/data/id=42/name=alice/data-00000.parquet" 
325+         ) ; 
326+     } 
223327} 
0 commit comments