Skip to content

Commit 309c64d

Browse files
authored
Add notion of \N to formats, to fix format new line clashes (#751)
* Add notion of \N to formats, to fix format new line clashes Signed-off-by: Elron Bandel <[email protected]> * Remove old code Signed-off-by: Elron Bandel <[email protected]> * Fix things Signed-off-by: Elron Bandel <[email protected]> --------- Signed-off-by: Elron Bandel <[email protected]>
1 parent f6717fe commit 309c64d

File tree

108 files changed

+336
-149
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+336
-149
lines changed

docs/docs/adding_format.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.. _adding_fromat:
1+
.. _adding_format:
22

33
.. note::
44

prepare/templates/completion/multiple_choice/templates.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
MultipleChoiceTemplate(
66
input_format="{context}",
77
target_field="answer",
8-
choices_seperator="",
8+
choices_separator="",
99
source_choice_format="{choice_text}",
1010
target_choice_format="{choice_text}",
1111
),
@@ -26,7 +26,7 @@
2626
MultipleChoiceTemplate(
2727
input_format=input_format,
2828
target_field="answer",
29-
choices_seperator="\n",
29+
choices_separator="\n",
3030
postprocessors=["processors.first_character"],
3131
),
3232
"templates.completion.multiple_choice.enumerated",
@@ -39,7 +39,7 @@
3939
input_format="Context: {context}...\nChoices:\n{choices}",
4040
target_prefix="Answer: ",
4141
target_field="answer",
42-
choices_seperator="\n",
42+
choices_separator="\n",
4343
postprocessors=["processors.first_character"],
4444
),
4545
"templates.completion.multiple_choice.standard",
@@ -52,7 +52,7 @@
5252
input_format="{context_type}: {context}\nChoices:\n{choices}",
5353
target_prefix="Answer: ",
5454
target_field="answer",
55-
choices_seperator="\n",
55+
choices_separator="\n",
5656
title_fields=["context_type"],
5757
postprocessors=["processors.first_character"],
5858
),

prepare/templates/evaluation/preference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
input_format="Instruction:\n{instruction}\n{input_type}:\n{input}\nResponses:\n{choices}",
88
target_prefix="{output_type}:\n",
99
target_field="output_choice",
10-
choices_seperator="\n",
10+
choices_separator="\n",
1111
postprocessors=["processors.to_string_stripped", "processors.first_character"],
1212
shuffle_choices=True,
1313
title_fields=["input_type", "output_type"],

prepare/templates/qa/multiple_choice/templates.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,15 @@
109109
},
110110
},
111111
}
112-
template_handels = []
112+
template_handles = []
113113

114114
for template_type, template_type_groups in templates.items():
115115
for benchmark_name, template_groups in template_type_groups.items():
116116
for language, input_format in template_groups.items():
117117
template = MultipleChoiceTemplate(
118118
input_format=input_format,
119119
target_field="answer",
120-
choices_seperator="\n",
120+
choices_separator="\n",
121121
target_choice_format=" {choice_numeral}"
122122
if "lm_eval_harness" in benchmark_name
123123
else "{choice_numeral}",
@@ -132,18 +132,18 @@
132132
overwrite=True,
133133
)
134134

135-
template_handels.append(
135+
template_handles.append(
136136
{
137137
"handle": template_handle,
138138
"template_type": template_type,
139139
"language": language,
140140
}
141141
)
142142

143-
template_handels = pd.DataFrame(template_handels)
144-
for template_type in template_handels.template_type.unique():
145-
for lang in template_handels.language.unique():
146-
template_handle_list = template_handels.query(
143+
template_handles = pd.DataFrame(template_handles)
144+
for template_type in template_handles.template_type.unique():
145+
for lang in template_handles.language.unique():
146+
template_handle_list = template_handles.query(
147147
f'language=="{lang}" and template_type=="{template_type}"'
148148
).handle.tolist()
149149

@@ -167,7 +167,7 @@
167167
MultipleChoiceTemplate(
168168
input_format=input_format,
169169
target_field="answer",
170-
choices_seperator="\n",
170+
choices_separator="\n",
171171
postprocessors=["processors.first_character"],
172172
),
173173
"templates.qa.multiple_choice.with_topic.mmlu",
@@ -179,7 +179,7 @@
179179
MultipleChoiceTemplate(
180180
input_format=input_format,
181181
target_field="answer",
182-
choices_seperator="\n",
182+
choices_separator="\n",
183183
postprocessors=["processors.first_character"],
184184
),
185185
"templates.qa.multiple_choice.with_context.with_topic.mmlu",
@@ -193,7 +193,7 @@
193193
MultipleChoiceTemplate(
194194
input_format=input_format,
195195
target_field="answer",
196-
choices_seperator="\n",
196+
choices_separator="\n",
197197
postprocessors=["processors.first_character"],
198198
),
199199
"templates.qa.multiple_choice.with_topic.helm",
@@ -205,7 +205,7 @@
205205
MultipleChoiceTemplate(
206206
input_format=input_format,
207207
target_field="answer",
208-
choices_seperator="\n",
208+
choices_separator="\n",
209209
postprocessors=["processors.first_character"],
210210
),
211211
"templates.qa.multiple_choice.with_context.with_topic.helm",
@@ -219,7 +219,7 @@
219219
MultipleChoiceTemplate(
220220
input_format=input_format,
221221
target_field="answer",
222-
choices_seperator="\n",
222+
choices_separator="\n",
223223
postprocessors=["processors.first_character"],
224224
),
225225
"templates.qa.multiple_choice.lm_eval_harness",
@@ -232,7 +232,7 @@
232232
input_format="Question:\n{question}\nChoices:\n{choices}",
233233
target_prefix="Answer:\n",
234234
target_field="answer",
235-
choices_seperator="\n",
235+
choices_separator="\n",
236236
postprocessors=["processors.to_string_stripped", "processors.first_character"],
237237
),
238238
"templates.qa.multiple_choice.title",
@@ -245,7 +245,7 @@
245245
input_format="Question:\n{question}\nChoices:\n{choices}",
246246
target_prefix="Answer:\n",
247247
target_field="answer",
248-
choices_seperator="\n",
248+
choices_separator="\n",
249249
target_choice_format="{choice_numeral}. {choice_text}",
250250
postprocessors=[
251251
"processors.take_first_non_empty_line",
@@ -262,7 +262,7 @@
262262
MultipleChoiceTemplate(
263263
input_format=input_format,
264264
target_field="answer",
265-
choices_seperator="\n",
265+
choices_separator="\n",
266266
postprocessors=["processors.first_character"],
267267
),
268268
"templates.qa.multiple_choice.with_context.lm_eval_harness",
@@ -275,7 +275,7 @@
275275
input_format="{context_type}:\n{context}\nQuestion:\n{question}\nChoices:\n{choices}",
276276
target_prefix="Answer:\n",
277277
target_field="answer",
278-
choices_seperator="\n",
278+
choices_separator="\n",
279279
postprocessors=["processors.to_string_stripped", "processors.first_character"],
280280
title_fields=["context_type"],
281281
),
@@ -289,7 +289,7 @@
289289
input_format="{context_type}:\n{context}\nQuestion:\n{question}\nChoices:\n{choices}",
290290
target_prefix="Answer:\n",
291291
target_field="answer",
292-
choices_seperator="\n",
292+
choices_separator="\n",
293293
target_choice_format="{choice_numeral}. {choice_text}",
294294
postprocessors=[
295295
"processors.take_first_non_empty_line",
@@ -308,7 +308,7 @@
308308
MultipleChoiceTemplate(
309309
input_format=input_format,
310310
target_field="answer",
311-
choices_seperator="\n",
311+
choices_separator="\n",
312312
postprocessors=["processors.first_character"],
313313
),
314314
"templates.qa.multiple_choice.with_topic.fm_eval",
@@ -320,7 +320,7 @@
320320
MultipleChoiceTemplate(
321321
input_format=input_format,
322322
target_field="answer",
323-
choices_seperator="\n",
323+
choices_separator="\n",
324324
postprocessors=["processors.first_character"],
325325
),
326326
"templates.qa.multiple_choice.with_context.with_topic.fm_eval",
@@ -333,7 +333,7 @@
333333
input_format="{context_type}:\n{context}\nQuestion:\n{question}\nChoices:\n{choices}",
334334
target_prefix="Answer:\n",
335335
target_field="answer",
336-
choices_seperator="\n",
336+
choices_separator="\n",
337337
postprocessors=["processors.to_string_stripped", "processors.first_character"],
338338
title_fields=["context_type"],
339339
),
@@ -347,7 +347,7 @@
347347
input_format="{context_type}:\n{context}\nQuestion:\n{question}\nChoices:\n{choices}",
348348
target_prefix="Answer:\n",
349349
target_field="answer",
350-
choices_seperator="\n",
350+
choices_separator="\n",
351351
target_choice_format="{choice_numeral}. {choice_text}",
352352
postprocessors=[
353353
"processors.take_first_non_empty_line",
@@ -365,7 +365,7 @@
365365
input_format="Question:\n{question}\nChoices:\n{choices}",
366366
target_prefix="Answer:\n",
367367
target_field="answer",
368-
choices_seperator="\n",
368+
choices_separator="\n",
369369
postprocessors=["processors.to_string_stripped", "processors.first_character"],
370370
),
371371
"templates.qa.multiple_choice.with_topic.title",
@@ -378,7 +378,7 @@
378378
input_format="Question:\n{question}\nChoices:\n{choices}",
379379
target_prefix="Answer:\n",
380380
target_field="answer",
381-
choices_seperator="\n",
381+
choices_separator="\n",
382382
target_choice_format="{choice_numeral}. {choice_text}",
383383
postprocessors=[
384384
"processors.take_first_non_empty_line",

prepare/templates/selection/by_attribute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
target_prefix="Most {required_attribute}:\n",
99
target_field="choice",
1010
choices_field="choices_texts",
11-
choices_seperator="\n",
11+
choices_separator="\n",
1212
postprocessors=["processors.to_string_stripped", "processors.first_character"],
1313
shuffle_choices=True,
1414
title_fields=["choices_text_type"],

prepare/templates/standard.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
)
99

1010
add_to_catalog(
11-
KeyValTemplate(pairs_seperator="\n", use_keys_for_outputs=True),
11+
KeyValTemplate(pairs_separator="\n", use_keys_for_outputs=True),
1212
"templates.key_val_with_new_lines",
1313
overwrite=True,
1414
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,5 +111,5 @@ extend-immutable-calls = ["fastapi.Depends", "fastapi.params.Depends", "fastapi.
111111
ignore-words-list = 'rouge,ot,ans,nd'
112112
check-filenames = true
113113
check-hidden = false
114-
regex = "([A-Z]?[a-z]+|[A-Z]+(?![a-z])|_[a-z]+)"
114+
regex = "(?<![a-z])[a-z'`]+|[A-Z][a-z'`]*|[a-z]+'[a-z]*|[a-z]+(?=[_-])|[a-z]+(?=[A-Z])|\\d+"
115115
skip = '*cards/trec*,*cards/belebele*,*cards/amazon_mass*,*cards/reuters21578*,*egg-info*,*/logs/*'

src/unitxt/catalog.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def path(self, artifact_identifier: str):
3737
assert (
3838
artifact_identifier.strip()
3939
), "artifact_identifier should not be an empty string."
40-
parts = artifact_identifier.split(constants.catalog_hirarchy_sep)
40+
parts = artifact_identifier.split(constants.catalog_hierarchy_sep)
4141
parts[-1] = parts[-1] + ".json"
4242
return os.path.join(self.location, *parts)
4343

@@ -116,7 +116,7 @@ def __contains__(self, artifact_identifier: str):
116116

117117
def verify_legal_catalog_name(name):
118118
assert re.match(
119-
r"^[\w" + constants.catalog_hirarchy_sep + "]+$", name
119+
r"^[\w" + constants.catalog_hierarchy_sep + "]+$", name
120120
), f'Artifict name ("{name}") should be alphanumeric. Use "." for nesting (e.g. myfolder.my_artifact)'
121121

122122

src/unitxt/catalog/templates/completion/multiple_choice/enumerated.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"type": "multiple_choice_template",
33
"input_format": "Pick the best ending to the context.\nContext: {context}...\nChoices:\n{choices}\nAnswer:",
44
"target_field": "answer",
5-
"choices_seperator": "\n",
5+
"choices_separator": "\n",
66
"postprocessors": [
77
"processors.first_character"
88
]

src/unitxt/catalog/templates/completion/multiple_choice/simple.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"type": "multiple_choice_template",
33
"input_format": "{context}",
44
"target_field": "answer",
5-
"choices_seperator": "",
5+
"choices_separator": "",
66
"source_choice_format": "{choice_text}",
77
"target_choice_format": "{choice_text}"
88
}

0 commit comments

Comments
 (0)