Skip to content

Commit 747d5ee

Browse files
catamorphismmihnita
authored andcommitted
ICU-22834 Update tests to reflect MF2 schema in conformance repo
This also updates the spec tests from the current version of the MFWG repository and removes some duplicate tests. Spec tests now reflect the message-format-wg repo as of unicode-org/message-format-wg@5612f3b It also updates both the ICU4C and ICU4J parsers to follow the current test schema in the conformance repository. This includes adding code to both parsers to allow `src` to be either a single string or an array of strings (per unicode-org/conformance#255 ), and eliminating `srcs` in tests. It also includes other changes to make updated spec tests pass: ICU4C: Allow trailing whitespace for complex messages, due to spec change ICU4C: Parse number literals correctly in Number::format ICU4J: Allow trailing whitespace after complex body, per spec change ICU4C: Fix bug that was assuming an .input variable can't have a reserved annotation ICU4C: Fix bug where unsupported '.i' was parsed as an '.input' ICU4C/ICU4J: Handle markup with space after the initial left curly brace ICU4C: Check for duplicate variant errors ICU4C/ICU4J: Handle leading whitespace in complex messages ICU4J: Treat whitespace after .input keyword as optional ICU4J: Don't format unannotated number literals as numbers
1 parent bfc5354 commit 747d5ee

File tree

66 files changed

+3682
-2317
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+3682
-2317
lines changed

icu4c/source/common/unicode/utypes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,12 +599,13 @@ typedef enum UErrorCode {
599599
U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
600600
U_MF_UNSUPPORTED_STATEMENT_ERROR, /**< A message includes a reserved statement. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
601601
U_MF_UNSUPPORTED_EXPRESSION_ERROR, /**< A message includes syntax reserved for future standardization or private implementation use. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
602+
U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
602603
#ifndef U_HIDE_DEPRECATED_API
603604
/**
604605
* One more than the highest normal formatting API error code.
605606
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
606607
*/
607-
U_FMT_PARSE_ERROR_LIMIT = 0x10121,
608+
U_FMT_PARSE_ERROR_LIMIT = 0x10122,
608609
#endif // U_HIDE_DEPRECATED_API
609610

610611
/*

icu4c/source/common/utypes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
141141
"U_MF_DUPLICATE_DECLARATION_ERROR",
142142
"U_MF_OPERAND_MISMATCH_ERROR",
143143
"U_MF_UNSUPPORTED_STATEMENT_ERROR",
144-
"U_MF_UNSUPPORTED_EXPRESSION_ERROR"
144+
"U_MF_UNSUPPORTED_EXPRESSION_ERROR",
145+
"U_MF_DUPLICATE_VARIANT_ERROR"
145146
};
146147

147148
static const char * const

icu4c/source/i18n/messageformat2_checker.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Checks data model errors
2222
2323
The following are checked here:
2424
Variant Key Mismatch
25+
Duplicate Variant
2526
Missing Fallback Variant (called NonexhaustivePattern here)
2627
Missing Selector Annotation
2728
Duplicate Declaration
@@ -162,6 +163,7 @@ void Checker::checkVariants(UErrorCode& status) {
162163

163164
// Check that one variant includes only wildcards
164165
bool defaultExists = false;
166+
bool duplicatesExist = false;
165167

166168
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
167169
const SelectorKeys& k = variants[i].getKeys();
@@ -173,10 +175,35 @@ void Checker::checkVariants(UErrorCode& status) {
173175
return;
174176
}
175177
defaultExists |= areDefaultKeys(keys, len);
178+
179+
// Check if this variant's keys are duplicated by any other variant's keys
180+
if (!duplicatesExist) {
181+
// This check takes quadratic time, but it can be optimized if checking
182+
// this property turns out to be a bottleneck.
183+
for (int32_t j = 0; j < i; j++) {
184+
const SelectorKeys& k1 = variants[j].getKeys();
185+
const Key* keys1 = k1.getKeysInternal();
186+
bool allEqual = true;
187+
// This variant was already checked,
188+
// so we know keys1.len == len
189+
for (int32_t kk = 0; kk < len; kk++) {
190+
if (!(keys[kk] == keys1[kk])) {
191+
allEqual = false;
192+
break;
193+
}
194+
}
195+
if (allEqual) {
196+
duplicatesExist = true;
197+
}
198+
}
199+
}
200+
}
201+
202+
if (duplicatesExist) {
203+
errors.addError(StaticErrorType::DuplicateVariant, status);
176204
}
177205
if (!defaultExists) {
178206
errors.addError(StaticErrorType::NonexhaustivePattern, status);
179-
return;
180207
}
181208
}
182209

icu4c/source/i18n/messageformat2_data_model.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ bool Key::operator==(const Key& other) const {
186186
if (isWildcard()) {
187187
return other.isWildcard();
188188
}
189+
if (other.isWildcard()) {
190+
return false;
191+
}
189192
return (asLiteral() == other.asLiteral());
190193
}
191194

@@ -833,31 +836,28 @@ const Expression& Binding::getValue() const {
833836
} else {
834837
const Operator* rator = rhs.getOperator(errorCode);
835838
bool hasOperator = U_SUCCESS(errorCode);
836-
if (hasOperator && rator->isReserved()) {
837-
errorCode = U_INVALID_STATE_ERROR;
839+
// Clear error code -- the "error" from the absent operator
840+
// is handled
841+
errorCode = U_ZERO_ERROR;
842+
b = Binding(variableName, std::move(rhs));
843+
b.local = false;
844+
if (hasOperator) {
845+
rator = b.getValue().getOperator(errorCode);
846+
U_ASSERT(U_SUCCESS(errorCode));
847+
b.annotation = &rator->contents;
838848
} else {
839-
// Clear error code -- the "error" from the absent operator
840-
// is handled
841-
errorCode = U_ZERO_ERROR;
842-
b = Binding(variableName, std::move(rhs));
843-
b.local = false;
844-
if (hasOperator) {
845-
rator = b.getValue().getOperator(errorCode);
846-
U_ASSERT(U_SUCCESS(errorCode));
847-
b.annotation = std::get_if<Callable>(&(rator->contents));
848-
} else {
849-
b.annotation = nullptr;
850-
}
851-
U_ASSERT(!hasOperator || b.annotation != nullptr);
849+
b.annotation = nullptr;
852850
}
851+
U_ASSERT(!hasOperator || b.annotation != nullptr);
853852
}
854853
}
855854
return b;
856855
}
857856

858857
const OptionMap& Binding::getOptionsInternal() const {
859858
U_ASSERT(annotation != nullptr);
860-
return annotation->getOptions();
859+
U_ASSERT(std::holds_alternative<Callable>(*annotation));
860+
return std::get_if<Callable>(annotation)->getOptions();
861861
}
862862

863863
void Binding::updateAnnotation() {
@@ -867,7 +867,7 @@ void Binding::updateAnnotation() {
867867
return;
868868
}
869869
U_ASSERT(U_SUCCESS(localErrorCode) && !rator->isReserved());
870-
annotation = std::get_if<Callable>(&(rator->contents));
870+
annotation = &rator->contents;
871871
}
872872

873873
Binding::Binding(const Binding& other) : var(other.var), expr(other.expr), local(other.local) {

icu4c/source/i18n/messageformat2_errors.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ namespace message2 {
135135
status = U_MF_VARIANT_KEY_MISMATCH_ERROR;
136136
break;
137137
}
138+
case StaticErrorType::DuplicateVariant: {
139+
status = U_MF_DUPLICATE_VARIANT_ERROR;
140+
break;
141+
}
138142
case StaticErrorType::NonexhaustivePattern: {
139143
status = U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
140144
break;
@@ -211,6 +215,10 @@ namespace message2 {
211215
dataModelError = true;
212216
break;
213217
}
218+
case StaticErrorType::DuplicateVariant: {
219+
dataModelError = true;
220+
break;
221+
}
214222
case StaticErrorType::NonexhaustivePattern: {
215223
dataModelError = true;
216224
break;

icu4c/source/i18n/messageformat2_errors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ namespace message2 {
5454
enum StaticErrorType {
5555
DuplicateDeclarationError,
5656
DuplicateOptionName,
57+
DuplicateVariant,
5758
MissingSelectorAnnotation,
5859
NonexhaustivePattern,
5960
SyntaxError,

icu4c/source/i18n/messageformat2_function_registry.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77

88
#if !UCONFIG_NO_MF2
99

10+
#include <math.h>
11+
1012
#include "unicode/dtptngen.h"
1113
#include "unicode/messageformat2_data_model_names.h"
1214
#include "unicode/messageformat2_function_registry.h"
1315
#include "unicode/smpdtfmt.h"
1416
#include "charstr.h"
17+
#include "double-conversion.h"
1518
#include "messageformat2_allocation.h"
1619
#include "messageformat2_function_registry_internal.h"
1720
#include "messageformat2_macros.h"
@@ -421,25 +424,51 @@ static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) {
421424
return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN")));
422425
}
423426

424-
static FormattedPlaceholder stringAsNumber(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
427+
static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& errorCode) {
425428
if (U_FAILURE(errorCode)) {
426429
return {};
427430
}
428431

429-
double numberValue;
430432
// Copying string to avoid GCC dangling-reference warning
431433
// (although the reference is safe)
432434
UnicodeString inputStr = input.asFormattable().getString(errorCode);
433435
// Precondition: `input`'s source Formattable has type string
434436
if (U_FAILURE(errorCode)) {
435437
return {};
436438
}
437-
UErrorCode localErrorCode = U_ZERO_ERROR;
438-
strToDouble(inputStr, numberValue, localErrorCode);
439-
if (U_FAILURE(localErrorCode)) {
439+
440+
// Hack: Check for cases that are forbidden by the MF2 grammar
441+
// but allowed by StringToDouble
442+
int32_t len = inputStr.length();
443+
444+
if (len > 0 && ((inputStr[0] == '+')
445+
|| (inputStr[0] == '0' && len > 1 && inputStr[1] != '.')
446+
|| (inputStr[len - 1] == '.')
447+
|| (inputStr[0] == '.'))) {
440448
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
449+
return 0;
450+
}
451+
452+
// Otherwise, convert to double using double_conversion::StringToDoubleConverter
453+
using namespace double_conversion;
454+
int processedCharactersCount = 0;
455+
StringToDoubleConverter converter(0, 0, 0, "", "");
456+
double result =
457+
converter.StringToDouble(reinterpret_cast<const uint16_t*>(inputStr.getBuffer()),
458+
len,
459+
&processedCharactersCount);
460+
if (processedCharactersCount != len) {
461+
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
462+
}
463+
return result;
464+
}
465+
466+
static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
467+
double numberValue = parseNumberLiteral(input, errorCode);
468+
if (U_FAILURE(errorCode)) {
441469
return notANumber(input);
442470
}
471+
443472
UErrorCode savedStatus = errorCode;
444473
number::FormattedNumber result = nf.formatDouble(numberValue, errorCode);
445474
// Ignore U_USING_DEFAULT_WARNING
@@ -590,7 +619,7 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
590619
}
591620
case UFMT_STRING: {
592621
// Try to parse the string as a number
593-
return stringAsNumber(realFormatter, arg, errorCode);
622+
return tryParsingNumberLiteral(realFormatter, arg, errorCode);
594623
}
595624
default: {
596625
// Other types can't be parsed as a number

icu4c/source/i18n/messageformat2_macros.h

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,11 @@ using namespace pluralimpl;
6060
// Fallback
6161
#define REPLACEMENT ((UChar32) 0xFFFD)
6262

63-
// MessageFormat2 uses four keywords: `.input`, `.local`, `.when`, and `.match`.
63+
// MessageFormat2 uses three keywords: `.input`, `.local`, and `.match`.
6464

65-
static constexpr UChar32 ID_INPUT[] = {
66-
0x2E, 0x69, 0x6E, 0x70, 0x75, 0x74, 0 /* ".input" */
67-
};
68-
69-
static constexpr UChar32 ID_LOCAL[] = {
70-
0x2E, 0x6C, 0x6F, 0x63, 0x61, 0x6C, 0 /* ".local" */
71-
};
72-
73-
static constexpr UChar32 ID_MATCH[] = {
74-
0x2E, 0x6D, 0x61, 0x74, 0x63, 0x68, 0 /* ".match" */
75-
};
65+
static constexpr std::u16string_view ID_INPUT = u".input";
66+
static constexpr std::u16string_view ID_LOCAL = u".local";
67+
static constexpr std::u16string_view ID_MATCH = u".match";
7668

7769
// Returns immediately if `errorCode` indicates failure
7870
#define CHECK_ERROR(errorCode) \

0 commit comments

Comments
 (0)