Skip to content

Commit

Permalink
ICU-22834 Update tests to reflect MF2 schema in conformance repo
Browse files Browse the repository at this point in the history
This also updates the spec tests from the current version of the MFWG
repository and removes some duplicate tests.
Spec tests now reflect the message-format-wg repo as of
unicode-org/message-format-wg@5612f3b

It also updates both the ICU4C and ICU4J parsers to follow the
current test schema in the conformance repository.

This includes adding code to both parsers to allow `src` to be
either a single string or an array of strings (per
unicode-org/conformance#255 ),
and eliminating `srcs` in tests.

It also includes other changes to make updated spec tests pass:

ICU4C: Allow trailing whitespace for complex messages, due to spec change
ICU4C: Parse number literals correctly in Number::format
ICU4J: Allow trailing whitespace after complex body, per spec change
ICU4C: Fix bug that was assuming an .input variable can't have a reserved annotation
ICU4C: Fix bug where unsupported '.i' was parsed as an '.input'
ICU4C/ICU4J: Handle markup with space after the initial left curly brace
ICU4C: Check for duplicate variant errors
ICU4C/ICU4J: Handle leading whitespace in complex messages
ICU4J: Treat whitespace after .input keyword as optional
ICU4J: Don't format unannotated number literals as numbers
  • Loading branch information
catamorphism authored and mihnita committed Sep 18, 2024
1 parent bfc5354 commit 747d5ee
Show file tree
Hide file tree
Showing 66 changed files with 3,682 additions and 2,317 deletions.
3 changes: 2 additions & 1 deletion icu4c/source/common/unicode/utypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -599,12 +599,13 @@ typedef enum UErrorCode {
U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_UNSUPPORTED_STATEMENT_ERROR, /**< A message includes a reserved statement. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_UNSUPPORTED_EXPRESSION_ERROR, /**< A message includes syntax reserved for future standardization or private implementation use. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_FMT_PARSE_ERROR_LIMIT = 0x10121,
U_FMT_PARSE_ERROR_LIMIT = 0x10122,
#endif // U_HIDE_DEPRECATED_API

/*
Expand Down
3 changes: 2 additions & 1 deletion icu4c/source/common/utypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_MF_DUPLICATE_DECLARATION_ERROR",
"U_MF_OPERAND_MISMATCH_ERROR",
"U_MF_UNSUPPORTED_STATEMENT_ERROR",
"U_MF_UNSUPPORTED_EXPRESSION_ERROR"
"U_MF_UNSUPPORTED_EXPRESSION_ERROR",
"U_MF_DUPLICATE_VARIANT_ERROR"
};

static const char * const
Expand Down
29 changes: 28 additions & 1 deletion icu4c/source/i18n/messageformat2_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Checks data model errors
The following are checked here:
Variant Key Mismatch
Duplicate Variant
Missing Fallback Variant (called NonexhaustivePattern here)
Missing Selector Annotation
Duplicate Declaration
Expand Down Expand Up @@ -162,6 +163,7 @@ void Checker::checkVariants(UErrorCode& status) {

// Check that one variant includes only wildcards
bool defaultExists = false;
bool duplicatesExist = false;

for (int32_t i = 0; i < dataModel.numVariants(); i++) {
const SelectorKeys& k = variants[i].getKeys();
Expand All @@ -173,10 +175,35 @@ void Checker::checkVariants(UErrorCode& status) {
return;
}
defaultExists |= areDefaultKeys(keys, len);

// Check if this variant's keys are duplicated by any other variant's keys
if (!duplicatesExist) {
// This check takes quadratic time, but it can be optimized if checking
// this property turns out to be a bottleneck.
for (int32_t j = 0; j < i; j++) {
const SelectorKeys& k1 = variants[j].getKeys();
const Key* keys1 = k1.getKeysInternal();
bool allEqual = true;
// This variant was already checked,
// so we know keys1.len == len
for (int32_t kk = 0; kk < len; kk++) {
if (!(keys[kk] == keys1[kk])) {
allEqual = false;
break;
}
}
if (allEqual) {
duplicatesExist = true;
}
}
}
}

if (duplicatesExist) {
errors.addError(StaticErrorType::DuplicateVariant, status);
}
if (!defaultExists) {
errors.addError(StaticErrorType::NonexhaustivePattern, status);
return;
}
}

Expand Down
34 changes: 17 additions & 17 deletions icu4c/source/i18n/messageformat2_data_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ bool Key::operator==(const Key& other) const {
if (isWildcard()) {
return other.isWildcard();
}
if (other.isWildcard()) {
return false;
}
return (asLiteral() == other.asLiteral());
}

Expand Down Expand Up @@ -833,31 +836,28 @@ const Expression& Binding::getValue() const {
} else {
const Operator* rator = rhs.getOperator(errorCode);
bool hasOperator = U_SUCCESS(errorCode);
if (hasOperator && rator->isReserved()) {
errorCode = U_INVALID_STATE_ERROR;
// Clear error code -- the "error" from the absent operator
// is handled
errorCode = U_ZERO_ERROR;
b = Binding(variableName, std::move(rhs));
b.local = false;
if (hasOperator) {
rator = b.getValue().getOperator(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
b.annotation = &rator->contents;
} else {
// Clear error code -- the "error" from the absent operator
// is handled
errorCode = U_ZERO_ERROR;
b = Binding(variableName, std::move(rhs));
b.local = false;
if (hasOperator) {
rator = b.getValue().getOperator(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
b.annotation = std::get_if<Callable>(&(rator->contents));
} else {
b.annotation = nullptr;
}
U_ASSERT(!hasOperator || b.annotation != nullptr);
b.annotation = nullptr;
}
U_ASSERT(!hasOperator || b.annotation != nullptr);
}
}
return b;
}

const OptionMap& Binding::getOptionsInternal() const {
U_ASSERT(annotation != nullptr);
return annotation->getOptions();
U_ASSERT(std::holds_alternative<Callable>(*annotation));
return std::get_if<Callable>(annotation)->getOptions();
}

void Binding::updateAnnotation() {
Expand All @@ -867,7 +867,7 @@ void Binding::updateAnnotation() {
return;
}
U_ASSERT(U_SUCCESS(localErrorCode) && !rator->isReserved());
annotation = std::get_if<Callable>(&(rator->contents));
annotation = &rator->contents;
}

Binding::Binding(const Binding& other) : var(other.var), expr(other.expr), local(other.local) {
Expand Down
8 changes: 8 additions & 0 deletions icu4c/source/i18n/messageformat2_errors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ namespace message2 {
status = U_MF_VARIANT_KEY_MISMATCH_ERROR;
break;
}
case StaticErrorType::DuplicateVariant: {
status = U_MF_DUPLICATE_VARIANT_ERROR;
break;
}
case StaticErrorType::NonexhaustivePattern: {
status = U_MF_NONEXHAUSTIVE_PATTERN_ERROR;
break;
Expand Down Expand Up @@ -211,6 +215,10 @@ namespace message2 {
dataModelError = true;
break;
}
case StaticErrorType::DuplicateVariant: {
dataModelError = true;
break;
}
case StaticErrorType::NonexhaustivePattern: {
dataModelError = true;
break;
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/i18n/messageformat2_errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ namespace message2 {
enum StaticErrorType {
DuplicateDeclarationError,
DuplicateOptionName,
DuplicateVariant,
MissingSelectorAnnotation,
NonexhaustivePattern,
SyntaxError,
Expand Down
41 changes: 35 additions & 6 deletions icu4c/source/i18n/messageformat2_function_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@

#if !UCONFIG_NO_MF2

#include <math.h>

#include "unicode/dtptngen.h"
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "double-conversion.h"
#include "messageformat2_allocation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
Expand Down Expand Up @@ -421,25 +424,51 @@ static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) {
return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN")));
}

static FormattedPlaceholder stringAsNumber(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return {};
}

double numberValue;
// Copying string to avoid GCC dangling-reference warning
// (although the reference is safe)
UnicodeString inputStr = input.asFormattable().getString(errorCode);
// Precondition: `input`'s source Formattable has type string
if (U_FAILURE(errorCode)) {
return {};
}
UErrorCode localErrorCode = U_ZERO_ERROR;
strToDouble(inputStr, numberValue, localErrorCode);
if (U_FAILURE(localErrorCode)) {

// Hack: Check for cases that are forbidden by the MF2 grammar
// but allowed by StringToDouble
int32_t len = inputStr.length();

if (len > 0 && ((inputStr[0] == '+')
|| (inputStr[0] == '0' && len > 1 && inputStr[1] != '.')
|| (inputStr[len - 1] == '.')
|| (inputStr[0] == '.'))) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
return 0;
}

// Otherwise, convert to double using double_conversion::StringToDoubleConverter
using namespace double_conversion;
int processedCharactersCount = 0;
StringToDoubleConverter converter(0, 0, 0, "", "");
double result =
converter.StringToDouble(reinterpret_cast<const uint16_t*>(inputStr.getBuffer()),
len,
&processedCharactersCount);
if (processedCharactersCount != len) {
errorCode = U_MF_OPERAND_MISMATCH_ERROR;
}
return result;
}

static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
double numberValue = parseNumberLiteral(input, errorCode);
if (U_FAILURE(errorCode)) {
return notANumber(input);
}

UErrorCode savedStatus = errorCode;
number::FormattedNumber result = nf.formatDouble(numberValue, errorCode);
// Ignore U_USING_DEFAULT_WARNING
Expand Down Expand Up @@ -590,7 +619,7 @@ FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& ar
}
case UFMT_STRING: {
// Try to parse the string as a number
return stringAsNumber(realFormatter, arg, errorCode);
return tryParsingNumberLiteral(realFormatter, arg, errorCode);
}
default: {
// Other types can't be parsed as a number
Expand Down
16 changes: 4 additions & 12 deletions icu4c/source/i18n/messageformat2_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,11 @@ using namespace pluralimpl;
// Fallback
#define REPLACEMENT ((UChar32) 0xFFFD)

// MessageFormat2 uses four keywords: `.input`, `.local`, `.when`, and `.match`.
// MessageFormat2 uses three keywords: `.input`, `.local`, and `.match`.

static constexpr UChar32 ID_INPUT[] = {
0x2E, 0x69, 0x6E, 0x70, 0x75, 0x74, 0 /* ".input" */
};

static constexpr UChar32 ID_LOCAL[] = {
0x2E, 0x6C, 0x6F, 0x63, 0x61, 0x6C, 0 /* ".local" */
};

static constexpr UChar32 ID_MATCH[] = {
0x2E, 0x6D, 0x61, 0x74, 0x63, 0x68, 0 /* ".match" */
};
static constexpr std::u16string_view ID_INPUT = u".input";
static constexpr std::u16string_view ID_LOCAL = u".local";
static constexpr std::u16string_view ID_MATCH = u".match";

// Returns immediately if `errorCode` indicates failure
#define CHECK_ERROR(errorCode) \
Expand Down
Loading

0 comments on commit 747d5ee

Please sign in to comment.