@@ -160,7 +160,7 @@ def _create_mixed_dataset(self, num_proc):
160
160
Create the final mixed dataset by loading, sampling, and
161
161
concatenating all datasets in this recipe
162
162
"""
163
- if not self .dataset_added :
163
+ if not self .datasets :
164
164
logger .error ("No dataset added to the recipe" )
165
165
166
166
mixed_ds = self ._load_and_sample_datasets (num_proc )
@@ -726,19 +726,36 @@ def collect(
726
726
sampling_size = self .NUM_SYNTH_SKILLS ,
727
727
)
728
728
729
+ def _write_mixed_recipe (self , recipe , output_file_recipe ):
730
+ """
731
+ Write the recipes created during data mixing without writing the actual
732
+ mixed datasets to disk.
733
+ """
734
+ full_recipe_path = os .path .join (self .output_dir , output_file_recipe )
735
+ recipe .save_recipe (full_recipe_path )
736
+
729
737
def _gen_mixed_data (self , recipe , output_file_recipe , output_file_data ):
730
738
"""
731
739
Mix the generated leaf node data into a single dataset and write it to
732
740
disk. The heavy lifting is delegated to the Recipe class.
733
741
"""
742
+ self ._write_mixed_recipe (recipe , output_file_recipe )
734
743
if recipe .dataset_added :
735
- full_recipe_path = os .path .join (self .output_dir , output_file_recipe )
736
- recipe .save_recipe (full_recipe_path )
737
744
recipe .save_mixed_dataset (
738
745
os .path .join (self .output_dir , output_file_data ),
739
746
self .num_procs ,
740
747
)
741
748
749
+ def write_recipes (self ):
750
+ self ._write_mixed_recipe (
751
+ self .knowledge_recipe ,
752
+ self .output_file_knowledge_recipe ,
753
+ )
754
+ self ._write_mixed_recipe (
755
+ self .skills_recipe ,
756
+ self .output_file_skills_recipe ,
757
+ )
758
+
742
759
def generate (self ):
743
760
self ._gen_mixed_data (
744
761
self .knowledge_recipe ,
0 commit comments