diff --git a/src/unitxt/schema.py b/src/unitxt/schema.py
index 5926b120cc..ac9cb621be 100644
--- a/src/unitxt/schema.py
+++ b/src/unitxt/schema.py
@@ -171,6 +171,12 @@ def process(
         task_data["metadata"]["template"] = self.artifact_to_jsonable(
             instance["recipe_metadata"]["template"]
         )
+        task_data["metadata"]["format"] = self.artifact_to_jsonable(
+            instance["recipe_metadata"]["format"]
+        )
+        task_data["metadata"]["system_prompt"] = self.artifact_to_jsonable(
+            instance["recipe_metadata"]["system_prompt"]
+        )
         if "criteria" in task_data and isinstance(task_data["criteria"], Artifact):
             task_data["criteria"] = self.artifact_to_jsonable(task_data["criteria"])
         if constants.demos_field in instance:
diff --git a/src/unitxt/standard.py b/src/unitxt/standard.py
index fd1eb39a9f..2307663759 100644
--- a/src/unitxt/standard.py
+++ b/src/unitxt/standard.py
@@ -7,10 +7,10 @@
 from .augmentors import Augmentor, NullAugmentor
 from .card import TaskCard
 from .collections_operators import GetLength
-from .dataclass import Field, InternalField, NonPositionalField, OptionalField
+from .dataclass import InternalField, NonPositionalField, OptionalField
 from .deprecation_utils import deprecation
 from .error_utils import UnitxtError
-from .formats import Format, SystemFormat
+from .formats import Format
 from .generator_utils import ReusableGenerator
 from .logging_utils import get_logger
 from .operator import (
@@ -25,7 +25,7 @@
 from .settings_utils import get_constants, get_settings
 from .splitters import ConstantSizeSample, RandomSizeSample, Sampler
 from .stream import MultiStream
-from .system_prompts import EmptySystemPrompt, SystemPrompt
+from .system_prompts import SystemPrompt
 from .task import Task
 from .templates import (
     ApplyRandomTemplate,
@@ -248,7 +248,7 @@ class DatasetRecipe(SourceSequentialOperator):
     card: TaskCard = None
     task: Task = None
     template: Union[Template, List[Template], TemplatesList] = None
-    system_prompt: SystemPrompt = Field(default_factory=EmptySystemPrompt)
+    system_prompt: SystemPrompt = None
     format: Format = None
     serializer: Union[SingleTypeSerializer, List[SingleTypeSerializer]] = None
 
@@ -490,7 +490,10 @@ def reset_pipeline(self):
             if settings.default_format is not None:
                 self.format, _ = fetch_artifact(settings.default_format)
             else:
-                self.format = SystemFormat()
+                self.format, _ = fetch_artifact("formats.empty")
+
+        if self.system_prompt is None:
+            self.system_prompt, _ = fetch_artifact("system_prompts.empty")
 
         if self.card and self.card.preprocess_steps is None:
             self.card.preprocess_steps = []
diff --git a/tests/library/test_api.py b/tests/library/test_api.py
index 1af86397f8..67cb5021ee 100644
--- a/tests/library/test_api.py
+++ b/tests/library/test_api.py
@@ -41,7 +41,7 @@ def test_load_dataset(self):
             "target": "5.0",
             "references": ["5.0"],
             "source": "Given this sentence: 'A plane is taking off.', on a scale of 1.0 to 5.0, what is the similarity to this text 'An air plane is taking off.'?\n",
-            "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.regression.two_texts.simple", "demos_pool_size": 0, "num_demos": 0}}',
+            "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.regression.two_texts.simple", "demos_pool_size": 0, "num_demos": 0,  "format": "formats.empty", "system_prompt": "system_prompts.empty"}}',
             "groups": [],
             "media": {"audios": [], "images": []},
             "subset": [],
@@ -72,7 +72,7 @@ def test_load_dataset_with_multi_num_demos(self):
                 "processors.take_first_non_empty_line",
                 "processors.cast_to_float_return_zero_if_failed",
             ],
-            "task_data": '{"text1": "A man is spreading shreded cheese on a pizza.", "text2": "A man is spreading shredded cheese on an uncooked pizza.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "metadata": {"data_classification_policy": ["public"], "demos_pool_size": 2, "num_demos": 0, "template": "templates.regression.two_texts.simple"}, "attribute_value": 3.799999952316284, "demos": []}',
+            "task_data": '{"text1": "A man is spreading shreded cheese on a pizza.", "text2": "A man is spreading shredded cheese on an uncooked pizza.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "metadata": {"data_classification_policy": ["public"], "demos_pool_size": 2, "num_demos": 0, "template": "templates.regression.two_texts.simple",  "format": "formats.empty", "system_prompt": "system_prompts.empty"}, "attribute_value": 3.799999952316284, "demos": []}',
             "data_classification_policy": ["public"],
         }
         self.assertEqual(len(dataset["train"]), 5)
@@ -106,7 +106,7 @@ def test_load_dataset_with_mixed_args(self):
                 "processors.take_first_non_empty_line",
                 "processors.cast_to_float_return_zero_if_failed",
             ],
-            "task_data": '{"text1": "A man is spreading shreded cheese on a pizza.", "text2": "A man is spreading shredded cheese on an uncooked pizza.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "metadata": {"data_classification_policy": ["public"], "demos_pool_size": 2, "num_demos": 0, "template": "templates.regression.two_texts.simple"}, "attribute_value": 3.799999952316284, "demos": []}',
+            "task_data": '{"text1": "A man is spreading shreded cheese on a pizza.", "text2": "A man is spreading shredded cheese on an uncooked pizza.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "metadata": {"data_classification_policy": ["public"], "demos_pool_size": 2, "num_demos": 0, "template": "templates.regression.two_texts.simple", "format": "formats.empty", "system_prompt": "system_prompts.empty"}, "attribute_value": 3.799999952316284, "demos": []}',
             "data_classification_policy": ["public"],
         }
         self.assertEqual(len(dataset["train"]), 5)
@@ -127,7 +127,7 @@ def test_load_dataset_with_multi_templates(self):
             "target": "5.0",
             "references": ["5.0"],
             "source": "text1: A plane is taking off., text2: An air plane is taking off., attribute_name: similarity, min_value: 1.0, max_value: 5.0\n",
-            "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.key_val", "demos_pool_size": 0, "num_demos": 0}}',
+            "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.key_val", "demos_pool_size": 0, "num_demos": 0,  "format": "formats.empty", "system_prompt": "system_prompts.empty"}}',
             "groups": [],
             "media": {"audios": [], "images": []},
             "subset": [],
@@ -158,7 +158,7 @@ def test_load_dataset_with_benchmark(self):
                 "processors.lower_case_till_punc",
             ],
             "source": "Classify the grammatical acceptability of the following text to one of these options: unacceptable, acceptable.\ntext: The sailors rode the breeze clear of the rocks.\nThe grammatical acceptability is ",
-            "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "label": "acceptable", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.instruction", "demos_pool_size": 0, "num_demos": 0}}',
+            "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "label": "acceptable", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.instruction", "demos_pool_size": 0, "num_demos": 0, "format": "formats.empty", "system_prompt": "system_prompts.empty"}}',
             "groups": [],
             "media": {"audios": [], "images": []},
             "subset": ["cola"],
@@ -173,7 +173,7 @@ def test_load_dataset_with_benchmark(self):
                 "processors.lower_case_till_punc",
             ],
             "source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: The drain is clogged with hair. It has to be cleaned.\nhypothesis: The hair has to be cleaned.\nThe entailment class is ",
-            "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "label": "entailment", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.relation.default", "demos_pool_size": 0, "num_demos": 0}}',
+            "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "label": "entailment", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.relation.default", "demos_pool_size": 0, "num_demos": 0, "format": "formats.empty", "system_prompt": "system_prompts.empty"}}',
             "groups": [],
             "media": {"audios": [], "images": []},
             "subset": ["wnli"],
@@ -203,7 +203,7 @@ def test_load_dataset_with_benchmark_mixed_args(self):
                 "processors.lower_case_till_punc",
             ],
             "source": "Classify the grammatical acceptability of the following text to one of these options: unacceptable, acceptable.\ntext: The sailors rode the breeze clear of the rocks.\nThe grammatical acceptability is ",
-            "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "label": "acceptable", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.instruction", "demos_pool_size": 0, "num_demos": 0}}',
+            "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "label": "acceptable", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.instruction", "demos_pool_size": 0, "num_demos": 0, "format": "formats.empty", "system_prompt": "system_prompts.empty"}}',
             "groups": [],
             "media": {"audios": [], "images": []},
             "subset": ["cola"],
@@ -218,7 +218,7 @@ def test_load_dataset_with_benchmark_mixed_args(self):
                 "processors.lower_case_till_punc",
             ],
             "source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: The drain is clogged with hair. It has to be cleaned.\nhypothesis: The hair has to be cleaned.\nThe entailment class is ",
-            "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "label": "entailment", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.relation.default", "demos_pool_size": 0, "num_demos": 0}}',
+            "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "label": "entailment", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.relation.default", "demos_pool_size": 0, "num_demos": 0, "format": "formats.empty", "system_prompt": "system_prompts.empty"}}',
             "groups": [],
             "media": {"audios": [], "images": []},
             "subset": ["wnli"],
@@ -291,6 +291,8 @@ def test_evaluate(self):
                     "template": "templates.regression.two_texts.simple",
                     "demos_pool_size": 0,
                     "num_demos": 0,
+                    "format": "formats.empty",
+                    "system_prompt": "system_prompts.empty",
                 },
                 "source": "Given this sentence: 'A plane is taking off.', on a scale of 1.0 to 5.0, what is the similarity to this text 'An air plane is taking off.'?\n",
             },
@@ -370,6 +372,8 @@ def test_evaluate_with_groups(self):
                     "template": "templates.regression.two_texts.simple",
                     "demos_pool_size": 0,
                     "num_demos": 0,
+                    "format": "formats.empty",
+                    "system_prompt": "system_prompts.empty",
                 },
                 "source": "Given this sentence: 'A plane is taking off.', on a scale of 1.0 to 5.0, what is the similarity to this text 'An air plane is taking off.'?\n",
             },
@@ -450,7 +454,7 @@ def test_produce_with_recipe(self):
                 "processors.lower_case_till_punc",
             ],
             "source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.\nhypothesis: mother was careful not to disturb her, undressing and climbing back into her berth.\nThe entailment class is entailment\n\npremise: Steve follows Fred's example in everything. He influences him hugely.\nhypothesis: Steve influences him hugely.\nThe entailment class is entailment\n\npremise: It works perfectly\nhypothesis: It works!\nThe entailment class is ",
-            "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "demos_pool_size": 5, "template": "templates.classification.multi_class.relation.default"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
+            "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "demos_pool_size": 5, "template": "templates.classification.multi_class.relation.default", "format": "formats.empty", "system_prompt": "system_prompts.empty"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
             "groups": [],
             "subset": [],
             "media": {"images": [], "audios": []},
@@ -479,7 +483,7 @@ def test_produce_with_task(self):
                 "processors.lower_case_till_punc",
             ],
             "source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: It works perfectly\nhypothesis: It works!\nThe entailment class is ",
-            "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default"}}',
+            "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default", "format": "formats.empty", "system_prompt": "system_prompts.empty"}}',
             "groups": [],
             "subset": [],
             "media": {"images": [], "audios": []},
@@ -510,7 +514,7 @@ def test_produce_with_recipe_with_list_of_instances(self):
                 "processors.lower_case_till_punc",
             ],
             "source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.\nhypothesis: mother was careful not to disturb her, undressing and climbing back into her berth.\nThe entailment class is entailment\n\npremise: Steve follows Fred's example in everything. He influences him hugely.\nhypothesis: Steve influences him hugely.\nThe entailment class is entailment\n\npremise: It works perfectly\nhypothesis: It works!\nThe entailment class is ",
-            "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "demos_pool_size": 5, "template": "templates.classification.multi_class.relation.default"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
+            "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "demos_pool_size": 5, "template": "templates.classification.multi_class.relation.default", "format": "formats.empty", "system_prompt": "system_prompts.empty"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
             "groups": [],
             "subset": [],
             "media": {"images": [], "audios": []},
diff --git a/tests/library/test_benchmark.py b/tests/library/test_benchmark.py
index baece69d2d..fce73d3860 100644
--- a/tests/library/test_benchmark.py
+++ b/tests/library/test_benchmark.py
@@ -38,7 +38,7 @@ def test_benchmark(self):
                     "target": "acceptable",
                     "references": ["acceptable"],
                     "source": "Classify the grammatical acceptability of the following text to one of these options: unacceptable, acceptable.\n\nUser:text: The sailors rode the breeze clear of the rocks.\nAgent:The grammatical acceptability is ",
-                    "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.instruction"}, "label": "acceptable"}',
+                    "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.instruction", "format": "formats.user_agent", "system_prompt": "system_prompts.empty"}, "label": "acceptable"}',
                     "groups": [],
                     "subset": ["cola"],
                 },
@@ -53,7 +53,7 @@ def test_benchmark(self):
                     "target": "acceptable",
                     "references": ["acceptable"],
                     "source": "Classify the grammatical acceptability of the following text to one of these options: unacceptable, acceptable.\n\nUser:text: The weights made the rope stretch over the pulley.\nAgent:The grammatical acceptability is ",
-                    "task_data": '{"text": "The weights made the rope stretch over the pulley.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.instruction"}, "label": "acceptable"}',
+                    "task_data": '{"text": "The weights made the rope stretch over the pulley.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.instruction", "format": "formats.user_agent", "system_prompt": "system_prompts.empty"}, "label": "acceptable"}',
                     "groups": [],
                     "subset": ["cola"],
                 },
@@ -72,7 +72,7 @@ def test_benchmark(self):
                     "target": "entailment",
                     "references": ["entailment"],
                     "source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\n\nUser:premise: The drain is clogged with hair. It has to be cleaned.\nhypothesis: The hair has to be cleaned.\nAgent:The entailment class is ",
-                    "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default"}, "label": "entailment"}',
+                    "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default", "format": "formats.user_agent", "system_prompt": "system_prompts.empty"}, "label": "entailment"}',
                     "groups": [],
                     "subset": ["wnli"],
                 },
@@ -91,7 +91,7 @@ def test_benchmark(self):
                     "target": "not entailment",
                     "references": ["not entailment"],
                     "source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\n\nUser:premise: Jane knocked on Susan's door but she did not answer.\nhypothesis: Susan did not answer.\nAgent:The entailment class is ",
-                    "task_data": '{"text_a": "Jane knocked on Susan\'s door but she did not answer.", "text_a_type": "premise", "text_b": "Susan did not answer.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default"}, "label": "not entailment"}',
+                    "task_data": '{"text_a": "Jane knocked on Susan\'s door but she did not answer.", "text_a_type": "premise", "text_b": "Susan did not answer.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default", "format": "formats.user_agent", "system_prompt": "system_prompts.empty"}, "label": "not entailment"}',
                     "groups": [],
                     "subset": ["wnli"],
                 },
diff --git a/tests/library/test_recipe.py b/tests/library/test_recipe.py
index 74463ebd5c..3c6aa154f2 100644
--- a/tests/library/test_recipe.py
+++ b/tests/library/test_recipe.py
@@ -162,7 +162,7 @@ def test_dataset_recipe_production_with_demos(self):
             "data_classification_policy": [],
             "postprocessors": ["processors.first_character"],
             "source": "<<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n\n\n\n\nUser: The following are multiple choice questions (with answers) about marketing.\n\nAlthough the content and quality can be as controlled as direct mail, response rates of this medium are lower because of the lack of a personal address mechanism. This media format is known as:\nA. Care lines.\nB. Direct mail.\nC. Inserts.\nD. Door to door.\nAnswer:\nAgent:  D\n\nUser: The following are multiple choice questions (with answers) about marketing.\n\n _____________ is a natural outcome when combining demographic and geographic variables.\nA. Geodemographics\nB. Product differentiation.\nC. ANSOFF matrix.\nD. Brand management.\nAnswer:\nAgent:  A\n\nUser: The following are multiple choice questions (with answers) about marketing.\n\nIn an organization, the group of people tasked with buying decisions is referred to as the _______________.\nA. Outsourcing unit.\nB. Procurement centre.\nC. Chief executive unit.\nD. Decision-making unit.\nAnswer:\nAgent:  D\n\n\nUser:The following are multiple choice questions (with answers) about testing.\n\nwhat?\nA. yes\nB. not\nC. maybe\nAnswer:\nAgent:",
-            "task_data": '{"topic": "testing", "question": "what?", "choices": ["yes", "not", "maybe"], "options": [" A", " B", " C"], "metadata": {"data_classification_policy": [], "demos_pool_size": 5, "num_demos": 3, "template": "templates.qa.multiple_choice.with_topic.lm_eval_harness"}, "demos": [{"topic": "marketing", "question": "Although the content and quality can be as controlled as direct mail, response rates of this medium are lower because of the lack of a personal address mechanism. This media format is known as:", "choices": ["Care lines.", "Direct mail.", "Inserts.", "Door to door."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}, {"topic": "marketing", "question": " _____________ is a natural outcome when combining demographic and geographic variables.", "choices": ["Geodemographics", "Product differentiation.", "ANSOFF matrix.", "Brand management."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 0}, {"topic": "marketing", "question": "In an organization, the group of people tasked with buying decisions is referred to as the _______________.", "choices": ["Outsourcing unit.", "Procurement centre.", "Chief executive unit.", "Decision-making unit."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}]}',
+            "task_data": '{"topic": "testing", "question": "what?", "choices": ["yes", "not", "maybe"], "options": [" A", " B", " C"], "metadata": {"data_classification_policy": [], "demos_pool_size": 5, "num_demos": 3, "template": "templates.qa.multiple_choice.with_topic.lm_eval_harness",  "format": "formats.user_agent", "system_prompt": "system_prompts.models.llama"}, "demos": [{"topic": "marketing", "question": "Although the content and quality can be as controlled as direct mail, response rates of this medium are lower because of the lack of a personal address mechanism. This media format is known as:", "choices": ["Care lines.", "Direct mail.", "Inserts.", "Door to door."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}, {"topic": "marketing", "question": " _____________ is a natural outcome when combining demographic and geographic variables.", "choices": ["Geodemographics", "Product differentiation.", "ANSOFF matrix.", "Brand management."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 0}, {"topic": "marketing", "question": "In an organization, the group of people tasked with buying decisions is referred to as the _______________.", "choices": ["Outsourcing unit.", "Procurement centre.", "Chief executive unit.", "Decision-making unit."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}]}',
             "groups": [],
             "subset": [],
             "media": {"images": [], "audios": []},
@@ -440,7 +440,7 @@ def test_empty_template(self):
             "target": "not entailment",
             "references": ["not entailment"],
             "source": "<<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n\n\n\n\nUser: Emma did not pass the ball to Janie although she was open., premise, She saw that Janie was open., hypothesis, entailment, not entailment, entailment\nAgent: not entailment\n\nUser: The foxes are getting in at night and attacking the chickens. I shall have to kill them., premise, I shall have to kill The foxes., hypothesis, entailment, not entailment, entailment\nAgent: not entailment\n\nUser: Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, he was twelve years old., premise, When Fred first saw my father, My father was twelve years old., hypothesis, entailment, not entailment, entailment\nAgent: entailment\n\n\nUser:Grace was happy to trade me her sweater for my jacket. She thinks it looks dowdy on her., premise, The sweater looks dowdy on her., hypothesis, entailment, not entailment, entailment\nAgent:",
-            "task_data": '{"text_a": "Grace was happy to trade me her sweater for my jacket. She thinks it looks dowdy on her.", "text_a_type": "premise", "text_b": "The sweater looks dowdy on her.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 3, "demos_pool_size": 100, "template": "templates.empty"}, "label": "not entailment", "demos": [{"text_a": "Emma did not pass the ball to Janie although she was open.", "text_a_type": "premise", "text_b": "She saw that Janie was open.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "The foxes are getting in at night and attacking the chickens. I shall have to kill them.", "text_a_type": "premise", "text_b": "I shall have to kill The foxes.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, he was twelve years old.", "text_a_type": "premise", "text_b": "When Fred first saw my father, My father was twelve years old.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
+            "task_data": '{"text_a": "Grace was happy to trade me her sweater for my jacket. She thinks it looks dowdy on her.", "text_a_type": "premise", "text_b": "The sweater looks dowdy on her.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 3, "demos_pool_size": 100, "template": "templates.empty",  "format": "formats.user_agent", "system_prompt": "system_prompts.models.llama"}, "label": "not entailment", "demos": [{"text_a": "Emma did not pass the ball to Janie although she was open.", "text_a_type": "premise", "text_b": "She saw that Janie was open.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "The foxes are getting in at night and attacking the chickens. I shall have to kill them.", "text_a_type": "premise", "text_b": "I shall have to kill The foxes.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, he was twelve years old.", "text_a_type": "premise", "text_b": "When Fred first saw my father, My father was twelve years old.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
             "groups": [],
             "subset": [],
         }
@@ -488,6 +488,8 @@ def test_key_val_template(self):
                 "demos_pool_size": 100,
                 "num_demos": 3,
                 "template": "templates.key_val",
+                "format": "formats.user_agent",
+                "system_prompt": "system_prompts.models.llama",
             },
             "label": "not entailment",
             "demos": [