Skip to content

Commit

Permalink
[EN DateTime V2] Added support for cases like "April ninth through 15…
Browse files Browse the repository at this point in the history
…th" (#2905) (#2994)

* Added support for cases like "April ninth through 15th" (#2905)

* Modified fix to use regexes instead of OrdinalExtractor according to review

* Removed DateContext modifications

* Corrected bug in Hindi Ordinal extraction

Co-authored-by: aitelint <[email protected]>
  • Loading branch information
aitelint and aitelint authored Jul 13, 2022
1 parent 73e8247 commit f4b0e70
Show file tree
Hide file tree
Showing 9 changed files with 387 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ public static class DateTimeDefinitions
public const string WrittenElevenToNineteenRegex = @"(?:eleven|twelve|(?:thir|four|fif|six|seven|eigh|nine)teen)";
public const string WrittenTensRegex = @"(?:ten|twenty|thirty|fou?rty|fifty|sixty|seventy|eighty|ninety)";
public static readonly string WrittenNumRegex = $@"(?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)";
public const string WrittenOneToNineOrdinalRegex = @"(?:first|second|third|fourth|fifth|sixth|seventh|eighth|nine?th)";
public const string WrittenTensOrdinalRegex = @"(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)";
public static readonly string WrittenOrdinalRegex = $@"(?:{WrittenOneToNineOrdinalRegex}|{WrittenTensOrdinalRegex}|{WrittenTensRegex}\s+{WrittenOneToNineOrdinalRegex})";
public static readonly string WrittenOrdinalDayRegex = $@"\b(the\s+)?(?<day>(?<ordinal>{WrittenOneToNineOrdinalRegex}|(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth)|(?:ten|twenty)\s+{WrittenOneToNineOrdinalRegex}|thirty\s+first))\b";
public static readonly string WrittenCenturyFullYearRegex = $@"(?:(one|two)\s+thousand((\s+and)?\s+{WrittenOneToNineRegex}\s+hundred)?)";
public const string WrittenCenturyOrdinalYearRegex = @"(?:twenty(\s+(one|two))?|ten|eleven|twelve|thirteen|fifteen|eighteen|(?:four|six|seven|nine)(teen)?|one|two|three|five|eight)";
public static readonly string CenturyRegex = $@"\b(?<century>{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+hundred)?)\b";
Expand Down Expand Up @@ -78,10 +82,10 @@ public static class DateTimeDefinitions
public const string ToTokenRegex = @"\b(to)$";
public const string FromRegex = @"\b(from(\s+the)?)$";
public const string BetweenTokenRegex = @"\b(between(\s+the)?)$";
public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*(({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+({DayRegex}|{WrittenOrdinalDayRegex}))((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b";
public static readonly string MonthWithYear = $@"\b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b";
public const string SpecialYearPrefixes = @"(calendar|(?<special>fiscal|school))";
public static readonly string OneWordPeriodRegex = $@"\b((((the\s+)?month of\s+)?({StrictRelativeRegex}\s+)?{MonthRegex})|(month|year) to date|(?<toDate>((un)?till?|to)\s+date)|({RelativeRegex}\s+)?(my\s+)?((?<business>working\s+week|workweek)|week(end)?|month|fortnight|(({SpecialYearPrefixes}\s+)?year))(?!((\s+of)?\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+to\s+date))(\s+{AfterNextSuffixRegex})?)\b";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static class NumbersDefinitions
public const string DecimalUnitsRegex = @"(?:डेढ़|डेढ़|डेढ|ढाई|सवा|सावा)";
public static readonly string DecimalUnitsWithRoundNumberRegex = $@"({DecimalUnitsRegex}\s+({{AllNumericalIntRegex}}\s+)?{RoundNumberIntegerRegex}|{DecimalUnitsRegex})";
public const string RoundNumberOrdinalRegex = @"(?:(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)(वां|वीं|वें|वाँ))";
public const string OneToNineOrdinalRegex = @"(?:पहला|पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))";
public const string OneToNineOrdinalRegex = @"(?:पहला|(?<!से\s*)पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))";
public const string TenToNineteenOrdinalRegex = @"(?:(दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस)(वां|वीं|वें|वाँ))";
public const string TwentyToTwentyNineOrdinalRegex = @"(?:(बीस|इक्कीस|बाईस|बाइस|तेईस|तेइस|चौबीस|पच्चीस|छब्बीस|सत्ताईस|सत्ताइस|अट्ठाईस|अट्ठाइस|उनतीस)(वां|वीं|वें|वाँ))";
public const string ThirtyToThirtyNineOrdinalRegex = @"(?:(तीस|इकतीस|इकत्तीस|बत्तीस|तैंतीस|चौंतीस|पैंतीस|छ्त्तीस|सैंतीस|अड़तीस|उनतालीस)(वां|वीं|वें|वाँ))";
Expand Down
5 changes: 5 additions & 0 deletions .NET/Microsoft.Recognizers.Text.DateTime/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public static class Constants
// SourceEntity Types
public const string SYS_DATETIME_DATETIMEPOINT = "datetimepoint";

// Number Types
public const string SYS_NUMBER_ORDINAL = "builtin.num.ordinal";

// Model Name
public const string MODEL_DATETIME = "datetime";

Expand Down Expand Up @@ -113,6 +116,7 @@ public static class Constants
public const int MaxWeekOfMonth = 5;
public const int MaxMonth = 12;
public const int MinMonth = 1;
public const int MaxDayMonth = 31;

// Day start hour
public const int DayHourStart = 0;
Expand Down Expand Up @@ -242,6 +246,7 @@ public static class Constants
public const string EndGroupName = "end";
public const string WithinGroupName = "within";
public const string NumberGroupName = "number";
public const string OrdinalGroupName = "ordinal";
public const string OrderGroupName = "order";
public const string AgoGroupName = "ago";
public const string YesterdayGroupName = "yesterday";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,10 @@ private List<ExtractResult> ExtractImpl(string text, DateObject reference)

tokens.AddRange(MergeTwoTimePoints(text, reference));
tokens.AddRange(MatchDuration(text, reference));
tokens.AddRange(SingleTimePointWithPatterns(text, new List<ExtractResult>(ordinalExtractions), reference));
tokens.AddRange(SingleTimePointWithPatterns(text, ordinalExtractions, reference));
tokens.AddRange(MatchComplexCases(text, simpleCasesResults, reference));
tokens.AddRange(MatchYearPeriod(text, reference));
tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, new List<ExtractResult>(ordinalExtractions)));
tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, ordinalExtractions));

return Token.MergeAllTokens(tokens, text, ExtractorName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,9 +699,47 @@ private DateTimeResolutionResult ParseSimpleCases(string text, DateObject refere

if (match.Success)
{
var days = match.Groups["day"];
beginDay = this.config.DayOfMonth[days.Captures[0].Value];
endDay = this.config.DayOfMonth[days.Captures[1].Value];
var days = match.Groups[Constants.DayGroupName];
var writtenDay = match.Groups[Constants.OrdinalGroupName];
if (writtenDay.Captures.Count > 0 && days.Captures[0].Value == writtenDay.Captures[0].Value)
{
// Parse beginDay in written form
var dayMatch = writtenDay.Captures[0];
var dayEr = new ExtractResult
{
Start = dayMatch.Index,
Length = dayMatch.Length,
Text = dayMatch.Value,
Type = Constants.SYS_NUMBER_ORDINAL,
Metadata = new Metadata { IsOrdinalRelative = false, },
};
var dayPr = this.config.NumberParser.Parse(dayEr);
beginDay = (int)(double)dayPr.Value;
}
else
{
beginDay = this.config.DayOfMonth[days.Captures[0].Value];
}

if (writtenDay.Captures.Count > 0 && days.Captures[1].Value == writtenDay.Captures[writtenDay.Captures.Count - 1].Value)
{
// Parse endDay in written form
var dayMatch = writtenDay.Captures[writtenDay.Captures.Count - 1];
var dayEr = new ExtractResult
{
Start = dayMatch.Index,
Length = dayMatch.Length,
Text = dayMatch.Value,
Type = Constants.SYS_NUMBER_ORDINAL,
Metadata = new Metadata { IsOrdinalRelative = false, },
};
var dayPr = this.config.NumberParser.Parse(dayEr);
endDay = (int)(double)dayPr.Value;
}
else
{
endDay = this.config.DayOfMonth[days.Captures[1].Value];
}

// parse year
year = config.DateExtractor.GetYearFromText(match.Match);
Expand Down
26 changes: 18 additions & 8 deletions Patterns/English/English-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ WrittenTensRegex: !simpleRegex
WrittenNumRegex: !nestedRegex
def: (?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)
references: [ WrittenOneToNineRegex, WrittenElevenToNineteenRegex, WrittenTensRegex ]
WrittenOneToNineOrdinalRegex: !simpleRegex
def: (?:first|second|third|fourth|fifth|sixth|seventh|eighth|nine?th)
WrittenTensOrdinalRegex: !simpleRegex
def: (?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)
WrittenOrdinalRegex: !nestedRegex
def: (?:{WrittenOneToNineOrdinalRegex}|{WrittenTensOrdinalRegex}|{WrittenTensRegex}\s+{WrittenOneToNineOrdinalRegex})
references: [ WrittenOneToNineOrdinalRegex, WrittenTensOrdinalRegex, WrittenTensRegex ]
WrittenOrdinalDayRegex: !nestedRegex
def: \b(the\s+)?(?<day>(?<ordinal>{WrittenOneToNineOrdinalRegex}|(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth)|(?:ten|twenty)\s+{WrittenOneToNineOrdinalRegex}|thirty\s+first))\b
references: [ WrittenOneToNineOrdinalRegex ]
WrittenCenturyFullYearRegex: !nestedRegex
def: (?:(one|two)\s+thousand((\s+and)?\s+{WrittenOneToNineRegex}\s+hundred)?)
references: [ WrittenOneToNineRegex]
Expand Down Expand Up @@ -137,17 +147,17 @@ FromRegex: !simpleRegex
BetweenTokenRegex: !simpleRegex
def: \b(between(\s+the)?)$
SimpleCasesRegex: !nestedRegex
def: \b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, TillRegex, MonthSuffixRegex, YearRegex, RangePrefixRegex ]
def: \b({RangePrefixRegex}\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*(({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+({DayRegex}|{WrittenOrdinalDayRegex}))((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, TillRegex, MonthSuffixRegex, YearRegex, RangePrefixRegex, WrittenOrdinalDayRegex ]
MonthFrontSimpleCasesRegex: !nestedRegex
def: \b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, TillRegex, YearRegex, RangePrefixRegex ]
def: \b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, TillRegex, YearRegex, RangePrefixRegex, WrittenOrdinalDayRegex ]
MonthFrontBetweenRegex: !nestedRegex
def: \b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, RangeConnectorRegex , YearRegex ]
def: \b{MonthSuffixRegex}\s+(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b
references: [ MonthSuffixRegex, DayRegex, RangeConnectorRegex , YearRegex, WrittenOrdinalDayRegex ]
BetweenRegex: !nestedRegex
def: \b(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex ]
def: \b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex, WrittenOrdinalDayRegex ]
MonthWithYear: !nestedRegex
def: \b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b
references: [ WrittenMonthRegex, YearRegex, TwoDigitYearRegex ]
Expand Down
2 changes: 1 addition & 1 deletion Patterns/Hindi/Hindi-Numbers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ DecimalUnitsWithRoundNumberRegex: !nestedRegex
RoundNumberOrdinalRegex: !simpleRegex
def: (?:(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)(वां|वीं|वें|वाँ))
OneToNineOrdinalRegex: !simpleRegex
def: (?:पहला|पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))
def: (?:पहला|(?<!से\s*)पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))
TenToNineteenOrdinalRegex: !simpleRegex
def: (?:(दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस)(वां|वीं|वें|वाँ))
TwentyToTwentyNineOrdinalRegex: !simpleRegex
Expand Down
Loading

0 comments on commit f4b0e70

Please sign in to comment.